journal-file.c revision 0f99f74a14ef193c1ebde687c5cc76e1d67b85ef
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2011 Lennart Poettering
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <stddef.h>
#include "journal-def.h"
#include "journal-file.h"
#include "journal-authenticate.h"
#include "lookup3.h"
#include "compress.h"
#include "fsprg.h"
#define COMPRESSION_SIZE_THRESHOLD (512ULL)
/* This is the minimum journal file size */
/* These are the lower and upper bounds if we deduce the max_use value
* from the file system size */
/* This is the upper bound if we deduce max_size from max_use */
/* This is the upper bound if we deduce the keep_free value from the
* file system size */
/* This is the keep_free value when we can't determine the system
* size */
/* n_data was the first entry we added after the initial file format design */
/* How many entries to keep in the entry array chain cache at max */
#define CHAIN_CACHE_MAX 20
/* How much to increase the journal file size at once each time we allocate something new. */
static int journal_file_set_online(JournalFile *f) {
assert(f);
if (!f->writable)
return -EPERM;
return -EINVAL;
case STATE_ONLINE:
return 0;
case STATE_OFFLINE:
return 0;
default:
return -EINVAL;
}
}
int journal_file_set_offline(JournalFile *f) {
assert(f);
if (!f->writable)
return -EPERM;
return -EINVAL;
return 0;
return 0;
}
void journal_file_close(JournalFile *f) {
assert(f);
#ifdef HAVE_GCRYPT
/* Write the final tag */
#endif
/* Sync everything to disk, before we mark the file offline */
if (f->header)
safe_close(f->fd);
if (f->mmap)
mmap_cache_unref(f->mmap);
free(f->compress_buffer);
#endif
#ifdef HAVE_GCRYPT
if (f->fss_file)
else if (f->fsprg_state)
free(f->fsprg_state);
free(f->fsprg_seed);
if (f->hmac)
gcry_md_close(f->hmac);
#endif
free(f);
}
Header h = {};
ssize_t k;
int r;
assert(f);
h.incompatible_flags |= htole32(
h.compatible_flags = htole32(
f->seal * HEADER_COMPATIBLE_SEALED);
r = sd_id128_randomize(&h.file_id);
if (r < 0)
return r;
if (template) {
} else
if (k < 0)
return -errno;
if (k != sizeof(h))
return -EIO;
return 0;
}
static int journal_file_refresh_header(JournalFile *f) {
int r;
assert(f);
if (r < 0)
return r;
r = sd_id128_get_boot(&boot_id);
if (r < 0)
return r;
f->tail_entry_monotonic_valid = true;
/* Sync the online state to disk */
return 0;
}
static int journal_file_verify_header(JournalFile *f) {
assert(f);
return -EBADMSG;
/* In both read and write mode we refuse to open files with
* incompatible flags we don't know */
if (flags & ~HEADER_INCOMPATIBLE_SUPPORTED) {
if (flags & ~HEADER_INCOMPATIBLE_ANY)
if (flags)
return -EPROTONOSUPPORT;
}
/* When open for writing we refuse to open files with
* compatible flags, too */
if (flags & ~HEADER_COMPATIBLE_ANY)
if (flags)
return -EPROTONOSUPPORT;
}
return -EBADMSG;
/* The first addition was n_data, so check that we are at least this large */
return -EBADMSG;
return -EBADMSG;
if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
return -ENODATA;
if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
return -ENODATA;
return -ENODATA;
if (f->writable) {
int r;
r = sd_id128_get_machine(&machine_id);
if (r < 0)
return r;
return -EHOSTDOWN;
if (state == STATE_ONLINE) {
return -EBUSY;
} else if (state == STATE_ARCHIVED)
return -ESHUTDOWN;
else if (state != STATE_OFFLINE) {
return -EBUSY;
}
}
return 0;
}
int r;
assert(f);
/* We assume that this file is not sparse, and we know that
* for sure, since we always call posix_fallocate()
* ourselves */
old_size =
return 0;
return -E2BIG;
else
available = 0;
return -E2BIG;
}
}
/* Increase by larger blocks at once */
/* Note that the glibc fallocate() fallback is very
inefficient, hence we try to minimize the allocation area
as we can. */
if (r != 0)
return -r;
return -errno;
return 0;
}
static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
assert(f);
if (size <= 0)
return -EINVAL;
/* Avoid SIGBUS on invalid accesses */
/* Hmm, out of range? Let's refresh the fstat() data
* first, before we trust that check. */
return -EADDRNOTAVAIL;
}
return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
}
[OBJECT_DATA] = sizeof(DataObject),
[OBJECT_FIELD] = sizeof(FieldObject),
[OBJECT_ENTRY] = sizeof(EntryObject),
[OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
[OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
[OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
[OBJECT_TAG] = sizeof(TagObject),
};
return sizeof(ObjectHeader);
}
int r;
void *t;
Object *o;
uint64_t s;
assert(f);
/* Objects may only be located at multiple of 64 bit */
return -EFAULT;
if (r < 0)
return r;
o = (Object*) t;
if (s < sizeof(ObjectHeader))
return -EBADMSG;
return -EBADMSG;
if (s < minimum_header_size(o))
return -EBADMSG;
return -EBADMSG;
if (s > sizeof(ObjectHeader)) {
if (r < 0)
return r;
o = (Object*) t;
}
*ret = o;
return 0;
}
uint64_t r;
assert(f);
if (seqnum) {
/* If an external seqnum counter was passed, we update
* both the local and the external one, and set it to
* the maximum of both */
if (*seqnum + 1 > r)
r = *seqnum + 1;
*seqnum = r;
}
if (f->header->head_entry_seqnum == 0)
return r;
}
int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
int r;
uint64_t p;
void *t;
assert(f);
r = journal_file_set_online(f);
if (r < 0)
return r;
if (p == 0)
else {
if (r < 0)
return r;
}
r = journal_file_allocate(f, p, size);
if (r < 0)
return r;
if (r < 0)
return r;
o = (Object*) t;
*ret = o;
*offset = p;
return 0;
}
static int journal_file_setup_data_hash_table(JournalFile *f) {
uint64_t s, p;
Object *o;
int r;
assert(f);
/* We estimate that we need 1 hash table entry per 768 of
journal file and we want to make sure we never get beyond
75% fill level. Calculate the hash table size for the
maximum file size based on these metrics. */
if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
r = journal_file_append_object(f,
&o, &p);
if (r < 0)
return r;
return 0;
}
static int journal_file_setup_field_hash_table(JournalFile *f) {
uint64_t s, p;
Object *o;
int r;
assert(f);
/* We use a fixed size hash table for the fields as this
* number should grow very slowly only */
r = journal_file_append_object(f,
&o, &p);
if (r < 0)
return r;
return 0;
}
static int journal_file_map_data_hash_table(JournalFile *f) {
uint64_t s, p;
void *t;
int r;
assert(f);
r = journal_file_move_to(f,
true,
p, s,
&t);
if (r < 0)
return r;
f->data_hash_table = t;
return 0;
}
static int journal_file_map_field_hash_table(JournalFile *f) {
uint64_t s, p;
void *t;
int r;
assert(f);
r = journal_file_move_to(f,
true,
p, s,
&t);
if (r < 0)
return r;
f->field_hash_table = t;
return 0;
}
static int journal_file_link_field(
JournalFile *f,
Object *o,
uint64_t p, h;
int r;
assert(f);
assert(o);
return -EINVAL;
/* This might alter the window we are looking at */
if (p == 0)
else {
r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
if (r < 0)
return r;
}
return 0;
}
static int journal_file_link_data(
JournalFile *f,
Object *o,
uint64_t p, h;
int r;
assert(f);
assert(o);
return -EINVAL;
/* This might alter the window we are looking at */
if (p == 0)
/* Only entry in the hash table is easy */
else {
/* Move back to the previous data object, to patch in
* pointer */
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
}
return 0;
}
JournalFile *f,
int r;
assert(f);
if (f->header->field_hash_table_size == 0)
return -EBADMSG;
while (p > 0) {
Object *o;
r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = p;
return 1;
}
}
return 0;
}
JournalFile *f,
assert(f);
return journal_file_find_field_object_with_hash(f,
}
JournalFile *f,
int r;
assert(f);
if (f->header->data_hash_table_size == 0)
return -EBADMSG;
while (p > 0) {
Object *o;
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
goto next;
uint64_t l;
return -EBADMSG;
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = p;
return 1;
}
#else
return -EPROTONOSUPPORT;
#endif
if (ret)
*ret = o;
if (offset)
*offset = p;
return 1;
}
next:
}
return 0;
}
JournalFile *f,
assert(f);
return journal_file_find_data_object_with_hash(f,
}
static int journal_file_append_field(
JournalFile *f,
Object *o;
int r;
assert(f);
if (r < 0)
return r;
else if (r > 0) {
if (ret)
*ret = o;
if (offset)
*offset = p;
return 0;
}
if (r < 0)
return r;
r = journal_file_link_field(f, o, p, hash);
if (r < 0)
return r;
/* The linking might have altered the window, so let's
* refresh our pointer */
r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
if (r < 0)
return r;
#ifdef HAVE_GCRYPT
r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
if (r < 0)
return r;
#endif
if (ret)
*ret = o;
if (offset)
*offset = p;
return 0;
}
static int journal_file_append_data(
JournalFile *f,
Object *o;
int r, compression = 0;
const void *eq;
assert(f);
if (r < 0)
return r;
else if (r > 0) {
if (ret)
*ret = o;
if (offset)
*offset = p;
return 0;
}
if (r < 0)
return r;
if (f->compress_xz &&
if (compression) {
}
}
#endif
if (!compression && size > 0)
r = journal_file_link_data(f, o, p, hash);
if (r < 0)
return r;
/* The linking might have altered the window, so let's
* refresh our pointer */
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
if (!data)
else
/* Create field object ... */
if (r < 0)
return r;
/* ... and link it in. */
}
#ifdef HAVE_GCRYPT
r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
if (r < 0)
return r;
#endif
if (ret)
*ret = o;
if (offset)
*offset = p;
return 0;
}
assert(o);
return 0;
}
assert(o);
return 0;
}
assert(o);
return 0;
}
static int link_entry_into_array(JournalFile *f,
uint64_t p) {
int r;
Object *o;
assert(f);
assert(p > 0);
while (a > 0) {
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
if (r < 0)
return r;
n = journal_file_entry_array_n_items(o);
if (i < n) {
return 0;
}
i -= n;
ap = a;
}
if (hidx > n)
else
n = n * 2;
if (n < 4)
n = 4;
&o, &q);
if (r < 0)
return r;
#ifdef HAVE_GCRYPT
r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
if (r < 0)
return r;
#endif
if (ap == 0)
else {
if (r < 0)
return r;
}
return 0;
}
static int link_entry_into_array_plus_one(JournalFile *f,
uint64_t p) {
int r;
assert(f);
assert(p > 0);
if (*idx == 0)
else {
le64_t i;
r = link_entry_into_array(f, first, &i, p);
if (r < 0)
return r;
}
return 0;
}
uint64_t p;
int r;
assert(f);
assert(o);
if (p == 0)
return -EINVAL;
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
return link_entry_into_array_plus_one(f,
&o->data.entry_offset,
&o->data.entry_array_offset,
offset);
}
uint64_t n, i;
int r;
assert(f);
assert(o);
return -EINVAL;
/* Link up the entry itself */
r = link_entry_into_array(f,
&f->header->entry_array_offset,
offset);
if (r < 0)
return r;
/* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
if (f->header->head_entry_realtime == 0)
f->tail_entry_monotonic_valid = true;
/* Link up the items */
n = journal_file_entry_n_items(o);
for (i = 0; i < n; i++) {
r = journal_file_link_entry_item(f, o, offset, i);
if (r < 0)
return r;
}
return 0;
}
static int journal_file_append_entry_internal(
JournalFile *f,
const dual_timestamp *ts,
Object *o;
int r;
assert(f);
if (r < 0)
return r;
#ifdef HAVE_GCRYPT
if (r < 0)
return r;
#endif
r = journal_file_link_entry(f, o, np);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
return 0;
}
void journal_file_post_change(JournalFile *f) {
assert(f);
/* inotify() does not receive IN_MODIFY events from file
* accesses done via mmap(). After each access we hence
* trigger IN_MODIFY by truncating the journal file to its
* current size which triggers IN_MODIFY. */
log_error("Failed to truncate file to its own size: %m");
}
return -1;
return 1;
return 0;
}
int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
unsigned i;
int r;
struct dual_timestamp _ts;
assert(f);
if (!ts) {
}
if (f->tail_entry_monotonic_valid &&
return -EINVAL;
#ifdef HAVE_GCRYPT
if (r < 0)
return r;
#endif
/* alloca() can't take 0, hence let's allocate at least one */
for (i = 0; i < n_iovec; i++) {
uint64_t p;
Object *o;
if (r < 0)
return r;
}
/* Order by the position on disk, in order to improve seek
* times for rotating media. */
return r;
}
typedef struct ChainCacheItem {
static void chain_cache_put(
Hashmap *h,
if (!ci) {
/* If the chain item to cache for this chain is the
* first one it's not worth caching anything */
return;
if (hashmap_size(h) >= CHAIN_CACHE_MAX)
ci = hashmap_steal_first(h);
else {
if (!ci)
return;
}
return;
}
} else
}
static int generic_array_get(
JournalFile *f,
uint64_t i,
Object *o;
uint64_t p = 0, a, t = 0;
int r;
assert(f);
a = first;
/* Try the chain cache first */
}
while (a > 0) {
uint64_t k;
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
if (r < 0)
return r;
k = journal_file_entry_array_n_items(o);
if (i < k) {
goto found;
}
i -= k;
t += k;
}
return 0;
/* Let's cache this item for the next invocation */
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = p;
return 1;
}
static int generic_array_get_plus_one(
JournalFile *f,
uint64_t i,
Object *o;
assert(f);
if (i == 0) {
int r;
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
return 1;
}
}
enum {
};
static int generic_array_bisect(
JournalFile *f,
uint64_t n,
bool subtract_one = false;
int r;
assert(f);
/* Start with the first array in the chain */
a = first;
/* Ah, we have iterated this bisection array chain
* previously! Let's see if we can skip ahead in the
* chain, as far as the last time. But we can't jump
* backwards in the chain, so let's check that
* first. */
if (r < 0)
return r;
if (r == TEST_LEFT) {
/* OK, what we are looking for is right of the
* begin of this EntryArray, so let's jump
* straight to previously cached array in the
* chain */
}
}
while (a > 0) {
if (r < 0)
return r;
if (right <= 0)
return 0;
i = right - 1;
if (p <= 0)
return -EBADMSG;
r = test_object(f, p, needle);
if (r < 0)
return r;
if (r == TEST_FOUND)
if (r == TEST_RIGHT) {
left = 0;
right -= 1;
/* If we cached the last index we
* looked at, let's try to not to jump
* too wildly around and see if we can
* limit the range to look at early to
* the immediate neighbors of the last
* index we looked at. */
if (last_index > 0) {
if (p <= 0)
return -EBADMSG;
r = test_object(f, p, needle);
if (r < 0)
return r;
if (r == TEST_FOUND)
if (r == TEST_RIGHT)
right = x;
else
left = x + 1;
}
if (last_index < right) {
if (p <= 0)
return -EBADMSG;
r = test_object(f, p, needle);
if (r < 0)
return r;
if (r == TEST_FOUND)
if (r == TEST_RIGHT)
right = y;
else
left = y + 1;
}
}
for (;;) {
if (direction == DIRECTION_UP)
subtract_one = true;
i = left;
goto found;
}
if (p <= 0)
return -EBADMSG;
r = test_object(f, p, needle);
if (r < 0)
return r;
if (r == TEST_FOUND)
if (r == TEST_RIGHT)
right = i;
else
left = i + 1;
}
}
if (k > n) {
if (direction == DIRECTION_UP) {
i = n;
subtract_one = true;
goto found;
}
return 0;
}
n -= k;
t += k;
}
return 0;
if (subtract_one && t == 0 && i == 0)
return 0;
/* Let's cache this item for the next invocation */
chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
if (subtract_one && i == 0)
p = last_p;
else if (subtract_one)
else
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = p;
if (idx)
return 1;
}
static int generic_array_bisect_plus_one(
JournalFile *f,
uint64_t n,
int r;
bool step_back = false;
Object *o;
assert(f);
if (n <= 0)
return 0;
/* This bisects the array in object 'first', but first checks
* an extra */
if (r < 0)
return r;
if (r == TEST_FOUND)
/* if we are looking with DIRECTION_UP then we need to first
see if in the actual array there is a matching entry, and
return the last one of that. But if there isn't any we need
to return this one. Hence remember this, and return it
below. */
if (r == TEST_LEFT)
if (r == TEST_RIGHT) {
if (direction == DIRECTION_DOWN)
goto found;
else
return 0;
}
if (r == 0 && step_back)
goto found;
if (r > 0 && idx)
(*idx) ++;
return r;
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
if (idx)
*idx = 0;
return 1;
}
assert(f);
assert(p > 0);
if (p == needle)
return TEST_FOUND;
else if (p < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
}
JournalFile *f,
uint64_t p,
return generic_array_bisect(f,
p,
}
Object *o;
int r;
assert(f);
assert(p > 0);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
return TEST_FOUND;
return TEST_LEFT;
else
return TEST_RIGHT;
}
JournalFile *f,
return generic_array_bisect(f,
}
Object *o;
int r;
assert(f);
assert(p > 0);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
return TEST_FOUND;
return TEST_LEFT;
else
return TEST_RIGHT;
}
JournalFile *f,
return generic_array_bisect(f,
}
Object *o;
int r;
assert(f);
assert(p > 0);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
return TEST_FOUND;
return TEST_LEFT;
else
return TEST_RIGHT;
}
static inline int find_data_object_by_boot_id(
JournalFile *f,
Object **o,
uint64_t *b) {
char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
}
JournalFile *f,
Object *o;
int r;
assert(f);
if (r < 0)
return r;
if (r == 0)
return -ENOENT;
return generic_array_bisect_plus_one(f,
}
JournalFile *f,
int r;
assert(f);
assert(p > 0 || !o);
if (n <= 0)
return 0;
if (!o)
else {
return -EINVAL;
r = generic_array_bisect(f,
p,
&i);
if (r <= 0)
return r;
if (direction == DIRECTION_DOWN) {
if (i >= n - 1)
return 0;
i++;
} else {
if (i <= 0)
return 0;
i--;
}
}
/* And jump to it */
r = generic_array_get(f,
i,
if (r <= 0)
return r;
if (p > 0 &&
f->path, i);
return -EBADMSG;
}
if (offset)
return 1;
}
JournalFile *f,
uint64_t i, n;
int r;
assert(f);
assert(o);
assert(p > 0);
return -EINVAL;
r = generic_array_bisect(f,
p,
&i);
if (r <= 0)
return r;
/* Calculate new index */
if (skip < 0) {
i = 0;
else
} else
if (n <= 0)
return -EBADMSG;
if (i >= n)
i = n-1;
return generic_array_get(f,
i,
}
JournalFile *f,
uint64_t n, i;
int r;
Object *d;
assert(f);
assert(p > 0 || !o);
if (r < 0)
return r;
if (n <= 0)
return n;
if (!o)
else {
return -EINVAL;
p,
&i);
if (r <= 0)
return r;
if (direction == DIRECTION_DOWN) {
if (i >= n - 1)
return 0;
i++;
} else {
if (i <= 0)
return 0;
i--;
}
}
return generic_array_get_plus_one(f,
i,
}
JournalFile *f,
uint64_t p,
int r;
Object *d;
assert(f);
if (r < 0)
return r;
return generic_array_bisect_plus_one(f,
p,
}
JournalFile *f,
Object *o, *d;
int r;
uint64_t b, z;
assert(f);
/* First, seek by time */
r = find_data_object_by_boot_id(f, boot_id, &o, &b);
if (r < 0)
return r;
if (r == 0)
return -ENOENT;
if (r <= 0)
return r;
/* And now, continue seeking until we find an entry that
* exists in both bisection arrays */
for (;;) {
uint64_t p, q;
if (r < 0)
return r;
z,
if (r <= 0)
return r;
r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
if (r < 0)
return r;
p,
if (r <= 0)
return r;
if (p == q) {
if (ret)
if (offset)
*offset = q;
return 1;
}
z = q;
}
}
JournalFile *f,
Object *d;
int r;
assert(f);
if (r < 0)
return r;
return generic_array_bisect_plus_one(f,
}
JournalFile *f,
Object *d;
int r;
assert(f);
if (r < 0)
return r;
return generic_array_bisect_plus_one(f,
}
void journal_file_dump(JournalFile *f) {
Object *o;
int r;
uint64_t p;
assert(f);
while (p != 0) {
r = journal_file_move_to_object(f, -1, p, &o);
if (r < 0)
goto fail;
case OBJECT_UNUSED:
printf("Type: OBJECT_UNUSED\n");
break;
case OBJECT_DATA:
printf("Type: OBJECT_DATA\n");
break;
case OBJECT_FIELD:
printf("Type: OBJECT_FIELD\n");
break;
case OBJECT_ENTRY:
break;
case OBJECT_FIELD_HASH_TABLE:
printf("Type: OBJECT_FIELD_HASH_TABLE\n");
break;
case OBJECT_DATA_HASH_TABLE:
printf("Type: OBJECT_DATA_HASH_TABLE\n");
break;
case OBJECT_ENTRY_ARRAY:
printf("Type: OBJECT_ENTRY_ARRAY\n");
break;
case OBJECT_TAG:
break;
default:
break;
}
printf("Flags: %s\n",
p = 0;
else
}
return;
fail:
log_error("File corrupt");
}
const char *x;
x = format_timestamp(buf, l, t);
if (x)
return x;
return " --- ";
}
void journal_file_print_header(JournalFile *f) {
char a[33], b[33], c[33], d[33];
char bytes[FORMAT_BYTES_MAX];
assert(f);
printf("File Path: %s\n"
"File ID: %s\n"
"Machine ID: %s\n"
"Boot ID: %s\n"
"Sequential Number ID: %s\n"
"State: %s\n"
"Compatible Flags:%s%s\n"
"Incompatible Flags:%s%s%s\n"
"Rotate Suggested: %s\n"
"Head Realtime Timestamp: %s\n"
"Tail Realtime Timestamp: %s\n"
"Tail Monotonic Timestamp: %s\n"
f->path,
yes_no(journal_file_rotate_suggested(f, 0)),
"Data Hash Table Fill: %.1f%%\n",
100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
"Field Hash Table Fill: %.1f%%\n",
100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
}
int journal_file_open(
const char *fname,
int flags,
bool compress,
bool seal,
JournalFile **ret) {
JournalFile *f;
int r;
bool newly_created = false;
return -EINVAL;
return -EINVAL;
if (!f)
return -ENOMEM;
f->fd = -1;
#if defined(HAVE_LZ4)
f->compress_lz4 = compress;
f->compress_xz = compress;
#endif
#ifdef HAVE_GCRYPT
#endif
if (mmap_cache)
else {
f->mmap = mmap_cache_new();
if (!f->mmap) {
r = -ENOMEM;
goto fail;
}
}
if (!f->path) {
r = -ENOMEM;
goto fail;
}
if (!f->chain_cache) {
r = -ENOMEM;
goto fail;
}
if (f->fd < 0) {
r = -errno;
goto fail;
}
r = -errno;
goto fail;
}
/* Let's attach the creation time to the journal file,
* so that the vacuuming code knows the age of this
* file even if the file might end up corrupted one
* day... Ideally we'd just use the creation time many
* file systems maintain for each file, but there is
* currently no usable API to query this, hence let's
* emulate this via extended attributes. If extended
* attributes are not supported we'll just skip this,
#ifdef HAVE_GCRYPT
/* Try to load the FSPRG state, and if we can't, then
* just don't do sealing */
if (f->seal) {
r = journal_file_fss_load(f);
if (r < 0)
f->seal = false;
}
#endif
r = journal_file_init_header(f, template);
if (r < 0)
goto fail;
r = -errno;
goto fail;
}
newly_created = true;
}
r = -EIO;
goto fail;
}
if (f->header == MAP_FAILED) {
r = -errno;
goto fail;
}
if (!newly_created) {
r = journal_file_verify_header(f);
if (r < 0)
goto fail;
}
#ifdef HAVE_GCRYPT
if (!newly_created && f->writable) {
r = journal_file_fss_load(f);
if (r < 0)
goto fail;
}
#endif
if (f->writable) {
if (metrics) {
} else if (template)
r = journal_file_refresh_header(f);
if (r < 0)
goto fail;
}
#ifdef HAVE_GCRYPT
r = journal_file_hmac_setup(f);
if (r < 0)
goto fail;
#endif
if (newly_created) {
if (r < 0)
goto fail;
if (r < 0)
goto fail;
#ifdef HAVE_GCRYPT
r = journal_file_append_first_tag(f);
if (r < 0)
goto fail;
#endif
}
if (r < 0)
goto fail;
r = journal_file_map_data_hash_table(f);
if (r < 0)
goto fail;
*ret = f;
return 0;
fail:
return r;
}
_cleanup_free_ char *p = NULL;
size_t l;
int r;
assert(f);
assert(*f);
old_file = *f;
return -EINVAL;
return -EINVAL;
if (r < 0)
return -ENOMEM;
if (r < 0)
return -errno;
r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
*f = new_file;
return r;
}
const char *fname,
int flags,
bool compress,
bool seal,
JournalFile **ret) {
int r;
size_t l;
_cleanup_free_ char *p = NULL;
if (r != -EBADMSG && /* corrupted */
r != -ENODATA && /* truncated */
r != -EHOSTDOWN && /* other machine */
r != -EPROTONOSUPPORT && /* incompatible feature */
r != -EBUSY && /* unclean shutdown */
r != -ESHUTDOWN /* already archived */)
return r;
return r;
return r;
return r;
/* The file is corrupted. Rotate it away and try it again (but only once) */
(int) l - 8, fname,
(unsigned long long) now(CLOCK_REALTIME),
random_u64()) < 0)
return -ENOMEM;
if (r < 0)
return -errno;
}
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
uint64_t i, n;
int r;
assert(o);
assert(p);
return -EPERM;
n = journal_file_entry_n_items(o);
/* alloca() can't take 0, hence let's allocate at least one */
for (i = 0; i < n; i++) {
uint64_t l, h;
size_t t;
void *data;
Object *u;
if (r < 0)
return r;
return -EBADMSG;
t = (size_t) l;
/* We hit the limit on 32bit machines */
if ((uint64_t) t != l)
return -E2BIG;
if (r < 0)
return r;
l = rsize;
#else
return -EPROTONOSUPPORT;
#endif
} else
if (r < 0)
return r;
if (r < 0)
return r;
}
}
assert(m);
if (fs_size > 0) {
if (m->max_use > DEFAULT_MAX_USE_UPPER)
m->max_use = DEFAULT_MAX_USE_UPPER;
if (m->max_use < DEFAULT_MAX_USE_LOWER)
m->max_use = DEFAULT_MAX_USE_LOWER;
} else
m->max_use = DEFAULT_MAX_USE_LOWER;
} else {
}
if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
} else
if (m->max_size < JOURNAL_FILE_SIZE_MIN)
else {
if (m->min_size < JOURNAL_FILE_SIZE_MIN)
}
if (fs_size > 0) {
if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
} else
m->keep_free = DEFAULT_KEEP_FREE;
}
log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
format_bytes(a, sizeof(a), m->max_use),
format_bytes(b, sizeof(b), m->max_size),
format_bytes(c, sizeof(c), m->min_size),
format_bytes(d, sizeof(d), m->keep_free));
}
assert(f);
if (from) {
if (f->header->head_entry_realtime == 0)
return -ENOENT;
}
if (to) {
if (f->header->tail_entry_realtime == 0)
return -ENOENT;
}
return 1;
}
int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
Object *o;
uint64_t p;
int r;
assert(f);
r = find_data_object_by_boot_id(f, boot_id, &o, &p);
if (r <= 0)
return r;
return 0;
if (from) {
if (r < 0)
return r;
}
if (to) {
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
r = generic_array_get_plus_one(f,
&o, NULL);
if (r <= 0)
return r;
}
return 1;
}
assert(f);
/* If we gained new header fields we gained new features,
* hence suggest a rotation */
return true;
}
/* Let's check if the hash tables grew over a certain fill
* level (75%, borrowing this value from Java's hash table
* implementation), and if so suggest a rotation. To calculate
* the fill level we need the n_data field, which only exists
* in newer versions. */
if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
f->path,
100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
return true;
}
if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
f->path,
100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
return true;
}
/* Are the data objects properly indexed by field objects? */
return true;
if (max_file_usec > 0) {
usec_t t, h;
t = now(CLOCK_REALTIME);
if (h > 0 && t > h + max_file_usec)
return true;
}
return false;
}