journal-file.c revision 7f120cc6a2eeea1b695222ff6e8e83b4f14ace59
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering This file is part of systemd.
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering Copyright 2011 Lennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering systemd is free software; you can redistribute it and/or modify it
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering under the terms of the GNU General Public License as published by
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering the Free Software Foundation; either version 2 of the License, or
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering (at your option) any later version.
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering systemd is distributed in the hope that it will be useful, but
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering WITHOUT ANY WARRANTY; without even the implied warranty of
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering General Public License for more details.
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering You should have received a copy of the GNU General Public License
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering along with systemd; If not, see <http://www.gnu.org/licenses/>.
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define COMPRESSION_SIZE_THRESHOLD (64ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/* This is the minimum journal file size */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/* These are the lower and upper bounds if we deduce the max_use value
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * from the file system size */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/* This is the upper bound if we deduce max_size from max_use */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_MAX_SIZE_UPPER (16ULL*1024ULL*1024ULL) /* 16 MiB */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/* This is the upper bound if we deduce the keep_free value from the
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * file system size */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/* This is the keep_free value when we can't determine the system
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering for (t = 0; t < _WINDOW_MAX; t++)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering munmap(f->windows[t].ptr, f->windows[t].size);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_init_header(JournalFile *f, JournalFile *template) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h.arena_offset = htole64(ALIGN64(sizeof(h)));
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h.seqnum_id = template->header->seqnum_id;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (k != sizeof(h))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_refresh_header(JournalFile *f) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = sd_id128_get_machine(&f->header->machine_id);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (sd_id128_equal(boot_id, f->header->boot_id))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_verify_header(JournalFile *f) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size)))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (!sd_id128_equal(machine_id, f->header->machine_id))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* We assume that this file is not sparse, and we know that
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * for sure, since we always call posix_fallocate()
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * ourselves */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (new_size < le64toh(f->header->arena_offset))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering new_size = le64toh(f->header->arena_offset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* Note that the glibc fallocate() fallback is very
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering inefficient, hence we try to minimize the allocation area
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (posix_fallocate(f->fd, old_size, new_size - old_size) < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering f->header->arena_size = new_size - htole64(f->header->arena_offset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering woffset = offset & ~((uint64_t) page_size() - 1ULL);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* Avoid SIGBUS on invalid accesses */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering *ret = (uint8_t*) window + (offset - woffset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (offset + size > (uint64_t) f->last_stat.st_size) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* Hmm, out of range? Let's refresh the fstat() data
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * first, before we trust that check. */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering offset + size > (uint64_t) f->last_stat.st_size)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering *ret = (uint8_t*) w->ptr + (offset - w->offset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* If the default window size is larger then what was
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * asked for extend the mapping a bit in the hope to
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * minimize needed remappings later on. We add half
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * the window space before and half behind the
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * requested mapping */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering delta = (DEFAULT_WINDOW_SIZE - size) / 2;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (offset + size > (uint64_t) f->last_stat.st_size)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering size = (uint64_t) f->last_stat.st_size - offset;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering } else if (o->object.type == OBJECT_FIELD) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringint journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (s < sizeof(ObjectHeader))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (s > sizeof(ObjectHeader)) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = journal_file_move_to(f, o->object.type, offset, s, &t);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* If an external seqnum counter was passed, we update
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * both the local and the external one, and set it to
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * the maximum of both */
static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
uint64_t p;
assert(f);
o = (Object*) t;
*ret = o;
*offset = p;
uint64_t s, p;
Object *o;
assert(f);
r = journal_file_append_object(f,
uint64_t s, p;
Object *o;
assert(f);
r = journal_file_append_object(f,
uint64_t s, p;
assert(f);
r = journal_file_move_to(f,
f->data_hash_table = t;
uint64_t s, p;
assert(f);
r = journal_file_move_to(f,
f->field_hash_table = t;
uint64_t p, h;
assert(f);
assert(o);
JournalFile *f,
assert(f);
return -EBADMSG;
Object *o;
goto next;
#ifdef HAVE_XZ
uint64_t l;
return -EBADMSG;
return -EBADMSG;
if (ret)
*ret = o;
if (offset)
*offset = p;
return -EPROTONOSUPPORT;
if (ret)
*ret = o;
if (offset)
*offset = p;
next:
JournalFile *f,
assert(f);
return journal_file_find_data_object_with_hash(f,
static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) {
Object *o;
bool compressed = false;
assert(f);
if (ret)
*ret = o;
if (offset)
*offset = p;
#ifdef HAVE_XZ
if (f->compress &&
if (compressed) {
f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
if (!compressed)
if (ret)
*ret = o;
if (offset)
*offset = p;
assert(o);
assert(o);
uint64_t p) {
Object *o;
assert(f);
assert(p > 0);
n = journal_file_entry_array_n_items(o);
ap = a;
if (hidx > n)
if (ap == 0)
*first = q;
uint64_t p) {
assert(f);
assert(p > 0);
if (*idx == 0)
uint64_t i;
uint64_t p;
assert(f);
assert(o);
return -EINVAL;
return link_entry_into_array_plus_one(f,
offset);
uint64_t n, i;
assert(f);
assert(o);
r = link_entry_into_array(f,
offset);
/* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
f->tail_entry_monotonic_valid = true;
n = journal_file_entry_n_items(o);
static int journal_file_append_entry_internal(
JournalFile *f,
Object *o;
assert(f);
if (ret)
*ret = o;
if (offset)
assert(f);
int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
assert(f);
if (!f->writable)
return -EPERM;
if (!ts) {
if (f->tail_entry_monotonic_valid &&
return -EINVAL;
for (i = 0; i < n_iovec; i++) {
uint64_t p;
Object *o;
uint64_t i,
Object *o;
uint64_t p = 0, a;
assert(f);
a = first;
uint64_t n;
n = journal_file_entry_array_n_items(o);
if (ret)
*ret = o;
if (offset)
*offset = p;
uint64_t i,
Object *o;
assert(f);
if (ret)
*ret = o;
if (offset)
uint64_t n,
bool subtract_one = false;
assert(f);
a = first;
if (right <= 0)
return -EBADMSG;
if (r == TEST_FOUND)
if (r == TEST_RIGHT) {
left = 0;
subtract_one = true;
i = left;
goto found;
return -EBADMSG;
if (r == TEST_FOUND)
if (r == TEST_RIGHT)
right = i;
if (subtract_one && t == 0 && i == 0)
if (subtract_one && i == 0)
p = last_p;
else if (subtract_one)
if (ret)
*ret = o;
if (offset)
*offset = p;
if (idx)
uint64_t n,
assert(f);
else if (r == TEST_FOUND) {
Object *o;
if (ret)
*ret = o;
if (offset)
if (idx)
*idx = 0;
} else if (r == TEST_RIGHT)
(*idx) ++;
Object *o;
assert(f);
assert(p > 0);
return TEST_FOUND;
return TEST_LEFT;
return TEST_RIGHT;
JournalFile *f,
return generic_array_bisect(f,
Object *o;
assert(f);
assert(p > 0);
return TEST_FOUND;
return TEST_LEFT;
return TEST_RIGHT;
JournalFile *f,
return generic_array_bisect(f,
Object *o;
assert(f);
assert(p > 0);
return TEST_FOUND;
return TEST_LEFT;
return TEST_RIGHT;
JournalFile *f,
Object *o;
return -ENOENT;
return generic_array_bisect_plus_one(f,
assert(f);
assert(p > 0);
if (p == needle)
return TEST_FOUND;
else if (p < needle)
return TEST_LEFT;
return TEST_RIGHT;
JournalFile *f,
uint64_t i, n;
assert(f);
assert(p > 0 || !o);
return -EINVAL;
r = generic_array_bisect(f,
return generic_array_get(f,
JournalFile *f,
uint64_t i, n;
assert(f);
assert(o);
assert(p > 0);
return -EINVAL;
r = generic_array_bisect(f,
if (skip < 0) {
return -EBADMSG;
return generic_array_get(f,
JournalFile *f,
uint64_t n, i;
Object *d;
assert(f);
assert(p > 0 || !o);
return -EINVAL;
return generic_array_get_plus_one(f,
JournalFile *f,
Object *d;
return generic_array_bisect_plus_one(f,
JournalFile *f,
Object *d;
return generic_array_bisect_plus_one(f,
Object *o;
uint64_t p;
assert(f);
f->path,
goto fail;
case OBJECT_UNUSED:
case OBJECT_DATA:
case OBJECT_ENTRY:
case OBJECT_FIELD_HASH_TABLE:
case OBJECT_DATA_HASH_TABLE:
case OBJECT_ENTRY_ARRAY:
fail:
int journal_file_open(
const char *fname,
int flags,
JournalFile *f;
bool newly_created = false;
return -EINVAL;
return -ENOMEM;
if (!f->path) {
r = -ENOMEM;
goto fail;
if (f->fd < 0) {
r = -errno;
goto fail;
r = -errno;
goto fail;
newly_created = true;
goto fail;
r = -errno;
goto fail;
r = -EIO;
goto fail;
r = -errno;
goto fail;
if (!newly_created) {
r = journal_file_verify_header(f);
goto fail;
if (f->writable) {
r = journal_file_refresh_header(f);
goto fail;
if (newly_created) {
goto fail;
goto fail;
goto fail;
r = journal_file_map_data_hash_table(f);
goto fail;
if (ret)
*ret = f;
fail:
size_t l;
assert(f);
assert(*f);
old_file = *f;
return -EINVAL;
return -EINVAL;
return -ENOMEM;
free(p);
return -errno;
*f = new_file;
struct vacuum_info {
char *filename;
const struct vacuum_info *a, *b;
a = _a;
b = _b;
DIR *d;
if (max_use <= 0)
return -errno;
size_t q;
goto finish;
if (!de)
r = -ENOMEM;
goto finish;
free(p);
free(p);
struct vacuum_info *j;
free(p);
r = -ENOMEM;
goto finish;
list = j;
n_list ++;
for(i = 0; i < n_list; i++) {
r = -errno;
goto finish;
for (i = 0; i < n_list; i++)
closedir(d);
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
uint64_t i, n;
assert(o);
assert(p);
return -EPERM;
return -EINVAL;
return -EINVAL;
n = journal_file_entry_n_items(o);
size_t t;
void *data;
Object *u;
return -EBADMSG;
t = (size_t) l;
if ((uint64_t) t != l)
return -E2BIG;
#ifdef HAVE_XZ
if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
return -EBADMSG;
l = rsize;
return -EPROTONOSUPPORT;
assert(m);
if (fs_size > 0) {
if (fs_size > 0) {