journal-file.c revision 7f120cc6a2eeea1b695222ff6e8e83b4f14ace59
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/***
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering This file is part of systemd.
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering Copyright 2011 Lennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering systemd is free software; you can redistribute it and/or modify it
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering under the terms of the GNU General Public License as published by
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering the Free Software Foundation; either version 2 of the License, or
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering (at your option) any later version.
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering systemd is distributed in the hope that it will be useful, but
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering WITHOUT ANY WARRANTY; without even the implied warranty of
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering General Public License for more details.
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering You should have received a copy of the GNU General Public License
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering along with systemd; If not, see <http://www.gnu.org/licenses/>.
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering***/
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#include <sys/mman.h>
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#include <errno.h>
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#include <sys/uio.h>
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#include <unistd.h>
4871690d9e32608bbd9b18505b5326c2079c9690Allin Cottrell#include <sys/statvfs.h>
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#include <fcntl.h>
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#include <stddef.h>
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#include "journal-def.h"
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#include "journal-file.h"
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#include "lookup3.h"
35e2e347d38cc2f8bd7c38a0d8a5129f5fbb0ab9Lennart Poettering#include "compress.h"
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define COMPRESSION_SIZE_THRESHOLD (64ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/* This is the minimum journal file size */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/* These are the lower and upper bounds if we deduce the max_use value
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * from the file system size */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/* This is the upper bound if we deduce max_size from max_use */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_MAX_SIZE_UPPER (16ULL*1024ULL*1024ULL) /* 16 MiB */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/* This is the upper bound if we deduce the keep_free value from the
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * file system size */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering/* This is the keep_free value when we can't determine the system
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * size */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringvoid journal_file_close(JournalFile *f) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering int t;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(f);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (f->header && f->writable)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering f->header->state = STATE_OFFLINE;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering for (t = 0; t < _WINDOW_MAX; t++)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (f->windows[t].ptr)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering munmap(f->windows[t].ptr, f->windows[t].size);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (f->fd >= 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering close_nointr_nofail(f->fd);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering free(f->path);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#ifdef HAVE_XZ
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering free(f->compress_buffer);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#endif
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering free(f);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering}
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_init_header(JournalFile *f, JournalFile *template) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering Header h;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering ssize_t k;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering int r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(f);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering zero(h);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering memcpy(h.signature, signature, 8);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h.arena_offset = htole64(ALIGN64(sizeof(h)));
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = sd_id128_randomize(&h.file_id);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (r < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (template) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h.seqnum_id = template->header->seqnum_id;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h.seqnum = template->header->seqnum;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering } else
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h.seqnum_id = h.file_id;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering k = pwrite(f->fd, &h, sizeof(h), 0);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (k < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -errno;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (k != sizeof(h))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EIO;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering}
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_refresh_header(JournalFile *f) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering int r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering sd_id128_t boot_id;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(f);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = sd_id128_get_machine(&f->header->machine_id);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (r < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = sd_id128_get_boot(&boot_id);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (r < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (sd_id128_equal(boot_id, f->header->boot_id))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering f->tail_entry_monotonic_valid = true;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering f->header->boot_id = boot_id;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering f->header->state = STATE_ONLINE;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering __sync_synchronize();
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering}
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_verify_header(JournalFile *f) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(f);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (memcmp(f->header, signature, 8))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EBADMSG;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#ifdef HAVE_XZ
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EPROTONOSUPPORT;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#else
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (f->header->incompatible_flags != 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EPROTONOSUPPORT;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering#endif
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size)))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -ENODATA;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (f->writable) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint32_t state;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering sd_id128_t machine_id;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering int r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = sd_id128_get_machine(&machine_id);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (r < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (!sd_id128_equal(machine_id, f->header->machine_id))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EHOSTDOWN;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering state = f->header->state;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (state == STATE_ONLINE)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering else if (state == STATE_ARCHIVED)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -ESHUTDOWN;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering else if (state != STATE_OFFLINE)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering }
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering}
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint64_t old_size, new_size;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(f);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* We assume that this file is not sparse, and we know that
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * for sure, since we always call posix_fallocate()
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * ourselves */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering old_size =
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering le64toh(f->header->arena_offset) +
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering le64toh(f->header->arena_size);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering new_size = PAGE_ALIGN(offset + size);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (new_size < le64toh(f->header->arena_offset))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering new_size = le64toh(f->header->arena_offset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (new_size <= old_size)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (f->metrics.max_size > 0 &&
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering new_size > f->metrics.max_size)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -E2BIG;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (new_size > f->metrics.min_size &&
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering f->metrics.keep_free > 0) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering struct statvfs svfs;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (fstatvfs(f->fd, &svfs) >= 0) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint64_t available;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering available = svfs.f_bfree * svfs.f_bsize;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (available >= f->metrics.keep_free)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering available -= f->metrics.keep_free;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering else
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering available = 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (new_size - old_size > available)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -E2BIG;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering }
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering }
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* Note that the glibc fallocate() fallback is very
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering inefficient, hence we try to minimize the allocation area
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering as we can. */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (posix_fallocate(f->fd, old_size, new_size - old_size) < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -errno;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (fstat(f->fd, &f->last_stat) < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -errno;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering f->header->arena_size = new_size - htole64(f->header->arena_offset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering}
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_map(
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering JournalFile *f,
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint64_t offset,
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint64_t size,
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering void **_window,
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint64_t *_woffset,
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint64_t *_wsize,
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering void **ret) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint64_t woffset, wsize;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering void *window;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(f);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(size > 0);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(ret);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering woffset = offset & ~((uint64_t) page_size() - 1ULL);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering wsize = size + (offset - woffset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering wsize = PAGE_ALIGN(wsize);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* Avoid SIGBUS on invalid accesses */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EADDRNOTAVAIL;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (window == MAP_FAILED)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -errno;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (_window)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering *_window = window;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (_woffset)
e88baee88fad8bc59d33b55a7a2d640ef9e16cd6Zbigniew Jędrzejewski-Szmek *_woffset = woffset;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (_wsize)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering *_wsize = wsize;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering *ret = (uint8_t*) window + (offset - woffset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering}
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering void *p = NULL;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint64_t delta;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering int r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering Window *w;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(f);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(ret);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(wt >= 0);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(wt < _WINDOW_MAX);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (offset + size > (uint64_t) f->last_stat.st_size) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* Hmm, out of range? Let's refresh the fstat() data
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * first, before we trust that check. */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (fstat(f->fd, &f->last_stat) < 0 ||
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering offset + size > (uint64_t) f->last_stat.st_size)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EADDRNOTAVAIL;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering }
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering w = f->windows + wt;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (_likely_(w->ptr &&
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering w->offset <= offset &&
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering w->offset + w->size >= offset + size)) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering *ret = (uint8_t*) w->ptr + (offset - w->offset);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering }
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (w->ptr) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (munmap(w->ptr, w->size) < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -errno;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering w->ptr = NULL;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering w->size = w->offset = 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering }
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (size < DEFAULT_WINDOW_SIZE) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* If the default window size is larger then what was
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * asked for extend the mapping a bit in the hope to
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * minimize needed remappings later on. We add half
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * the window space before and half behind the
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * requested mapping */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering delta = (DEFAULT_WINDOW_SIZE - size) / 2;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (delta > offset)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering delta = offset;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering offset -= delta;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering size = DEFAULT_WINDOW_SIZE;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering } else
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering delta = 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (offset + size > (uint64_t) f->last_stat.st_size)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering size = (uint64_t) f->last_stat.st_size - offset;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (size <= 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EADDRNOTAVAIL;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = journal_file_map(f,
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering offset, size,
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering &w->ptr, &w->offset, &w->size,
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering &p);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (r < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering *ret = (uint8_t*) p + delta;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return 0;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering}
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic bool verify_hash(Object *o) {
2b43f939a4b3ad5aeb2650868b0234ff42ec0045Lennart Poettering uint64_t h1, h2;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(o);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h1 = le64toh(o->data.hash);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering } else if (o->object.type == OBJECT_FIELD) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h1 = le64toh(o->field.hash);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering } else
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return true;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return h1 == h2;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering}
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringint journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering int r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering void *t;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering Object *o;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint64_t s;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(f);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(ret);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(type < _OBJECT_TYPE_MAX);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (r < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering o = (Object*) t;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering s = le64toh(o->object.size);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (s < sizeof(ObjectHeader))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EBADMSG;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (type >= 0 && o->object.type != type)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EBADMSG;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (s > sizeof(ObjectHeader)) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = journal_file_move_to(f, o->object.type, offset, s, &t);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (r < 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering o = (Object*) t;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering }
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (!verify_hash(o))
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return -EBADMSG;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering *ret = o;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return 0;
b2e6df73aa508cc09b1b536a2fb9f90f152b89faZbigniew Jędrzejewski-Szmek}
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poetteringstatic uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering uint64_t r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering assert(f);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = le64toh(f->header->seqnum) + 1;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (seqnum) {
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering /* If an external seqnum counter was passed, we update
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * both the local and the external one, and set it to
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering * the maximum of both */
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (*seqnum + 1 > r)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering r = *seqnum + 1;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering *seqnum = r;
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering }
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering f->header->seqnum = htole64(r);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering if (f->header->first_seqnum == 0)
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering f->header->first_seqnum = htole64(r);
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering
ef63833d532dd86bdba63211e6a1363cbb3ef61dLennart Poettering return r;
}
static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
int r;
uint64_t p;
Object *tail, *o;
void *t;
assert(f);
assert(size >= sizeof(ObjectHeader));
assert(offset);
assert(ret);
p = le64toh(f->header->tail_object_offset);
if (p == 0)
p = le64toh(f->header->arena_offset);
else {
r = journal_file_move_to_object(f, -1, p, &tail);
if (r < 0)
return r;
p += ALIGN64(le64toh(tail->object.size));
}
r = journal_file_allocate(f, p, size);
if (r < 0)
return r;
r = journal_file_move_to(f, type, p, size, &t);
if (r < 0)
return r;
o = (Object*) t;
zero(o->object);
o->object.type = type;
o->object.size = htole64(size);
f->header->tail_object_offset = htole64(p);
f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
*ret = o;
*offset = p;
return 0;
}
static int journal_file_setup_data_hash_table(JournalFile *f) {
uint64_t s, p;
Object *o;
int r;
assert(f);
s = DEFAULT_DATA_HASH_TABLE_SIZE;
r = journal_file_append_object(f,
OBJECT_DATA_HASH_TABLE,
offsetof(Object, hash_table.items) + s,
&o, &p);
if (r < 0)
return r;
memset(o->hash_table.items, 0, s);
f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
f->header->data_hash_table_size = htole64(s);
return 0;
}
static int journal_file_setup_field_hash_table(JournalFile *f) {
uint64_t s, p;
Object *o;
int r;
assert(f);
s = DEFAULT_FIELD_HASH_TABLE_SIZE;
r = journal_file_append_object(f,
OBJECT_FIELD_HASH_TABLE,
offsetof(Object, hash_table.items) + s,
&o, &p);
if (r < 0)
return r;
memset(o->hash_table.items, 0, s);
f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
f->header->field_hash_table_size = htole64(s);
return 0;
}
static int journal_file_map_data_hash_table(JournalFile *f) {
uint64_t s, p;
void *t;
int r;
assert(f);
p = le64toh(f->header->data_hash_table_offset);
s = le64toh(f->header->data_hash_table_size);
r = journal_file_move_to(f,
WINDOW_DATA_HASH_TABLE,
p, s,
&t);
if (r < 0)
return r;
f->data_hash_table = t;
return 0;
}
static int journal_file_map_field_hash_table(JournalFile *f) {
uint64_t s, p;
void *t;
int r;
assert(f);
p = le64toh(f->header->field_hash_table_offset);
s = le64toh(f->header->field_hash_table_size);
r = journal_file_move_to(f,
WINDOW_FIELD_HASH_TABLE,
p, s,
&t);
if (r < 0)
return r;
f->field_hash_table = t;
return 0;
}
static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
uint64_t p, h;
int r;
assert(f);
assert(o);
assert(offset > 0);
assert(o->object.type == OBJECT_DATA);
o->data.next_hash_offset = o->data.next_field_offset = 0;
o->data.entry_offset = o->data.entry_array_offset = 0;
o->data.n_entries = 0;
h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
p = le64toh(f->data_hash_table[h].head_hash_offset);
if (p == 0) {
/* Only entry in the hash table is easy */
f->data_hash_table[h].head_hash_offset = htole64(offset);
} else {
/* Temporarily move back to the previous data object,
* to patch in pointer */
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
o->data.next_hash_offset = htole64(offset);
r = journal_file_move_to_object(f, OBJECT_DATA, offset, &o);
if (r < 0)
return r;
}
f->data_hash_table[h].tail_hash_offset = htole64(offset);
return 0;
}
int journal_file_find_data_object_with_hash(
JournalFile *f,
const void *data, uint64_t size, uint64_t hash,
Object **ret, uint64_t *offset) {
uint64_t p, osize, h;
int r;
assert(f);
assert(data || size == 0);
osize = offsetof(Object, data.payload) + size;
if (f->header->data_hash_table_size == 0)
return -EBADMSG;
h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
p = le64toh(f->data_hash_table[h].head_hash_offset);
while (p > 0) {
Object *o;
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
if (le64toh(o->data.hash) != hash)
goto next;
if (o->object.flags & OBJECT_COMPRESSED) {
#ifdef HAVE_XZ
uint64_t l;
size_t rsize;
l = le64toh(o->object.size);
if (l <= offsetof(Object, data.payload))
return -EBADMSG;
l -= offsetof(Object, data.payload);
if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
return -EBADMSG;
if ((uint64_t) rsize == size &&
memcmp(f->compress_buffer, data, size) == 0) {
if (ret)
*ret = o;
if (offset)
*offset = p;
return 1;
}
#else
return -EPROTONOSUPPORT;
#endif
} else if (le64toh(o->object.size) == osize &&
memcmp(o->data.payload, data, size) == 0) {
if (ret)
*ret = o;
if (offset)
*offset = p;
return 1;
}
next:
p = le64toh(o->data.next_hash_offset);
}
return 0;
}
int journal_file_find_data_object(
JournalFile *f,
const void *data, uint64_t size,
Object **ret, uint64_t *offset) {
uint64_t hash;
assert(f);
assert(data || size == 0);
hash = hash64(data, size);
return journal_file_find_data_object_with_hash(f,
data, size, hash,
ret, offset);
}
static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) {
uint64_t hash, p;
uint64_t osize;
Object *o;
int r;
bool compressed = false;
assert(f);
assert(data || size == 0);
hash = hash64(data, size);
r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
if (r < 0)
return r;
else if (r > 0) {
if (ret)
*ret = o;
if (offset)
*offset = p;
return 0;
}
osize = offsetof(Object, data.payload) + size;
r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
if (r < 0)
return r;
o->data.hash = htole64(hash);
#ifdef HAVE_XZ
if (f->compress &&
size >= COMPRESSION_SIZE_THRESHOLD) {
uint64_t rsize;
compressed = compress_blob(data, size, o->data.payload, &rsize);
if (compressed) {
o->object.size = htole64(offsetof(Object, data.payload) + rsize);
o->object.flags |= OBJECT_COMPRESSED;
f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
}
}
#endif
if (!compressed)
memcpy(o->data.payload, data, size);
r = journal_file_link_data(f, o, p, hash);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = p;
return 0;
}
uint64_t journal_file_entry_n_items(Object *o) {
assert(o);
assert(o->object.type == htole64(OBJECT_ENTRY));
return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
}
static uint64_t journal_file_entry_array_n_items(Object *o) {
assert(o);
assert(o->object.type == htole64(OBJECT_ENTRY_ARRAY));
return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
}
static int link_entry_into_array(JournalFile *f,
uint64_t *first,
uint64_t *idx,
uint64_t p) {
int r;
uint64_t n = 0, ap = 0, q, i, a, hidx;
Object *o;
assert(f);
assert(first);
assert(idx);
assert(p > 0);
a = le64toh(*first);
i = hidx = le64toh(*idx);
while (a > 0) {
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
if (r < 0)
return r;
n = journal_file_entry_array_n_items(o);
if (i < n) {
o->entry_array.items[i] = htole64(p);
*idx = htole64(hidx + 1);
return 0;
}
i -= n;
ap = a;
a = le64toh(o->entry_array.next_entry_array_offset);
}
if (hidx > n)
n = (hidx+1) * 2;
else
n = n * 2;
if (n < 4)
n = 4;
r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
&o, &q);
if (r < 0)
return r;
o->entry_array.items[i] = htole64(p);
if (ap == 0)
*first = q;
else {
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
if (r < 0)
return r;
o->entry_array.next_entry_array_offset = htole64(q);
}
*idx = htole64(hidx + 1);
return 0;
}
static int link_entry_into_array_plus_one(JournalFile *f,
uint64_t *extra,
uint64_t *first,
uint64_t *idx,
uint64_t p) {
int r;
assert(f);
assert(extra);
assert(first);
assert(idx);
assert(p > 0);
if (*idx == 0)
*extra = htole64(p);
else {
uint64_t i;
i = le64toh(*idx) - 1;
r = link_entry_into_array(f, first, &i, p);
if (r < 0)
return r;
}
*idx = htole64(le64toh(*idx) + 1);
return 0;
}
static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
uint64_t p;
int r;
assert(f);
assert(o);
assert(offset > 0);
p = le64toh(o->entry.items[i].object_offset);
if (p == 0)
return -EINVAL;
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
return link_entry_into_array_plus_one(f,
&o->data.entry_offset,
&o->data.entry_array_offset,
&o->data.n_entries,
offset);
}
static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
uint64_t n, i;
int r;
assert(f);
assert(o);
assert(offset > 0);
assert(o->object.type == OBJECT_ENTRY);
__sync_synchronize();
/* Link up the entry itself */
r = link_entry_into_array(f,
&f->header->entry_array_offset,
&f->header->n_entries,
offset);
if (r < 0)
return r;
/* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
if (f->header->head_entry_realtime == 0)
f->header->head_entry_realtime = o->entry.realtime;
f->header->tail_entry_realtime = o->entry.realtime;
f->header->tail_entry_monotonic = o->entry.monotonic;
f->tail_entry_monotonic_valid = true;
/* Link up the items */
n = journal_file_entry_n_items(o);
for (i = 0; i < n; i++) {
r = journal_file_link_entry_item(f, o, offset, i);
if (r < 0)
return r;
}
return 0;
}
static int journal_file_append_entry_internal(
JournalFile *f,
const dual_timestamp *ts,
uint64_t xor_hash,
const EntryItem items[], unsigned n_items,
uint64_t *seqnum,
Object **ret, uint64_t *offset) {
uint64_t np;
uint64_t osize;
Object *o;
int r;
assert(f);
assert(items || n_items == 0);
assert(ts);
osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
if (r < 0)
return r;
o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
o->entry.realtime = htole64(ts->realtime);
o->entry.monotonic = htole64(ts->monotonic);
o->entry.xor_hash = htole64(xor_hash);
o->entry.boot_id = f->header->boot_id;
r = journal_file_link_entry(f, o, np);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = np;
return 0;
}
void journal_file_post_change(JournalFile *f) {
assert(f);
/* inotify() does not receive IN_MODIFY events from file
* accesses done via mmap(). After each access we hence
* trigger IN_MODIFY by truncating the journal file to its
* current size which triggers IN_MODIFY. */
__sync_synchronize();
if (ftruncate(f->fd, f->last_stat.st_size) < 0)
log_error("Failed to to truncate file to its own size: %m");
}
int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
unsigned i;
EntryItem *items;
int r;
uint64_t xor_hash = 0;
struct dual_timestamp _ts;
assert(f);
assert(iovec || n_iovec == 0);
if (!f->writable)
return -EPERM;
if (!ts) {
dual_timestamp_get(&_ts);
ts = &_ts;
}
if (f->tail_entry_monotonic_valid &&
ts->monotonic < le64toh(f->header->tail_entry_monotonic))
return -EINVAL;
items = alloca(sizeof(EntryItem) * n_iovec);
for (i = 0; i < n_iovec; i++) {
uint64_t p;
Object *o;
r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
if (r < 0)
return r;
xor_hash ^= le64toh(o->data.hash);
items[i].object_offset = htole64(p);
items[i].hash = o->data.hash;
}
r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
journal_file_post_change(f);
return r;
}
static int generic_array_get(JournalFile *f,
uint64_t first,
uint64_t i,
Object **ret, uint64_t *offset) {
Object *o;
uint64_t p = 0, a;
int r;
assert(f);
a = first;
while (a > 0) {
uint64_t n;
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
if (r < 0)
return r;
n = journal_file_entry_array_n_items(o);
if (i < n) {
p = le64toh(o->entry_array.items[i]);
break;
}
i -= n;
a = le64toh(o->entry_array.next_entry_array_offset);
}
if (a <= 0 || p <= 0)
return 0;
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = p;
return 1;
}
static int generic_array_get_plus_one(JournalFile *f,
uint64_t extra,
uint64_t first,
uint64_t i,
Object **ret, uint64_t *offset) {
Object *o;
assert(f);
if (i == 0) {
int r;
r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = extra;
return 1;
}
return generic_array_get(f, first, i-1, ret, offset);
}
enum {
TEST_FOUND,
TEST_LEFT,
TEST_RIGHT
};
static int generic_array_bisect(JournalFile *f,
uint64_t first,
uint64_t n,
uint64_t needle,
int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
direction_t direction,
Object **ret,
uint64_t *offset,
uint64_t *idx) {
uint64_t a, p, t = 0, i = 0, last_p = 0;
bool subtract_one = false;
Object *o, *array = NULL;
int r;
assert(f);
assert(test_object);
a = first;
while (a > 0) {
uint64_t left, right, k, lp;
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
if (r < 0)
return r;
k = journal_file_entry_array_n_items(array);
right = MIN(k, n);
if (right <= 0)
return 0;
i = right - 1;
lp = p = le64toh(array->entry_array.items[i]);
if (p <= 0)
return -EBADMSG;
r = test_object(f, p, needle);
if (r < 0)
return r;
if (r == TEST_FOUND)
r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
if (r == TEST_RIGHT) {
left = 0;
right -= 1;
for (;;) {
if (left == right) {
if (direction == DIRECTION_UP)
subtract_one = true;
i = left;
goto found;
}
assert(left < right);
i = (left + right) / 2;
p = le64toh(array->entry_array.items[i]);
if (p <= 0)
return -EBADMSG;
r = test_object(f, p, needle);
if (r < 0)
return r;
if (r == TEST_FOUND)
r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
if (r == TEST_RIGHT)
right = i;
else
left = i + 1;
}
}
if (k > n)
return 0;
last_p = lp;
n -= k;
t += k;
a = le64toh(array->entry_array.next_entry_array_offset);
}
return 0;
found:
if (subtract_one && t == 0 && i == 0)
return 0;
if (subtract_one && i == 0)
p = last_p;
else if (subtract_one)
p = le64toh(array->entry_array.items[i-1]);
else
p = le64toh(array->entry_array.items[i]);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = p;
if (idx)
*idx = t + i - (subtract_one ? 1 : 0);
return 1;
}
static int generic_array_bisect_plus_one(JournalFile *f,
uint64_t extra,
uint64_t first,
uint64_t n,
uint64_t needle,
int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
direction_t direction,
Object **ret,
uint64_t *offset,
uint64_t *idx) {
int r;
assert(f);
assert(test_object);
if (n <= 0)
return 0;
/* This bisects the array in object 'first', but first checks
* an extra */
r = test_object(f, extra, needle);
if (r < 0)
return r;
else if (r == TEST_FOUND) {
Object *o;
r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = extra;
if (idx)
*idx = 0;
return 1;
} else if (r == TEST_RIGHT)
return 0;
r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
if (r > 0)
(*idx) ++;
return r;
}
static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
Object *o;
int r;
assert(f);
assert(p > 0);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (le64toh(o->entry.seqnum) == needle)
return TEST_FOUND;
else if (le64toh(o->entry.seqnum) < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
}
int journal_file_move_to_entry_by_seqnum(
JournalFile *f,
uint64_t seqnum,
direction_t direction,
Object **ret,
uint64_t *offset) {
return generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
le64toh(f->header->n_entries),
seqnum,
test_object_seqnum,
direction,
ret, offset, NULL);
}
static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
Object *o;
int r;
assert(f);
assert(p > 0);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (le64toh(o->entry.realtime) == needle)
return TEST_FOUND;
else if (le64toh(o->entry.realtime) < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
}
int journal_file_move_to_entry_by_realtime(
JournalFile *f,
uint64_t realtime,
direction_t direction,
Object **ret,
uint64_t *offset) {
return generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
le64toh(f->header->n_entries),
realtime,
test_object_realtime,
direction,
ret, offset, NULL);
}
static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
Object *o;
int r;
assert(f);
assert(p > 0);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (le64toh(o->entry.monotonic) == needle)
return TEST_FOUND;
else if (le64toh(o->entry.monotonic) < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
}
int journal_file_move_to_entry_by_monotonic(
JournalFile *f,
sd_id128_t boot_id,
uint64_t monotonic,
direction_t direction,
Object **ret,
uint64_t *offset) {
char t[8+32+1] = "_BOOT_ID=";
Object *o;
int r;
sd_id128_to_string(boot_id, t + 8);
r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
if (r < 0)
return r;
else if (r == 0)
return -ENOENT;
return generic_array_bisect_plus_one(f,
le64toh(o->data.entry_offset),
le64toh(o->data.entry_array_offset),
le64toh(o->data.n_entries),
monotonic,
test_object_monotonic,
direction,
ret, offset, NULL);
}
static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
assert(f);
assert(p > 0);
if (p == needle)
return TEST_FOUND;
else if (p < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
}
int journal_file_next_entry(
JournalFile *f,
Object *o, uint64_t p,
direction_t direction,
Object **ret, uint64_t *offset) {
uint64_t i, n;
int r;
assert(f);
assert(p > 0 || !o);
n = le64toh(f->header->n_entries);
if (n <= 0)
return 0;
if (!o)
i = direction == DIRECTION_DOWN ? 0 : n - 1;
else {
if (o->object.type != OBJECT_ENTRY)
return -EINVAL;
r = generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
le64toh(f->header->n_entries),
p,
test_object_offset,
DIRECTION_DOWN,
NULL, NULL,
&i);
if (r <= 0)
return r;
if (direction == DIRECTION_DOWN) {
if (i >= n - 1)
return 0;
i++;
} else {
if (i <= 0)
return 0;
i--;
}
}
/* And jump to it */
return generic_array_get(f,
le64toh(f->header->entry_array_offset),
i,
ret, offset);
}
int journal_file_skip_entry(
JournalFile *f,
Object *o, uint64_t p,
int64_t skip,
Object **ret, uint64_t *offset) {
uint64_t i, n;
int r;
assert(f);
assert(o);
assert(p > 0);
if (o->object.type != OBJECT_ENTRY)
return -EINVAL;
r = generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
le64toh(f->header->n_entries),
p,
test_object_offset,
DIRECTION_DOWN,
NULL, NULL,
&i);
if (r <= 0)
return r;
/* Calculate new index */
if (skip < 0) {
if ((uint64_t) -skip >= i)
i = 0;
else
i = i - (uint64_t) -skip;
} else
i += (uint64_t) skip;
n = le64toh(f->header->n_entries);
if (n <= 0)
return -EBADMSG;
if (i >= n)
i = n-1;
return generic_array_get(f,
le64toh(f->header->entry_array_offset),
i,
ret, offset);
}
int journal_file_next_entry_for_data(
JournalFile *f,
Object *o, uint64_t p,
uint64_t data_offset,
direction_t direction,
Object **ret, uint64_t *offset) {
uint64_t n, i;
int r;
Object *d;
assert(f);
assert(p > 0 || !o);
r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
if (r < 0)
return r;
n = le64toh(d->data.n_entries);
if (n <= 0)
return n;
if (!o)
i = direction == DIRECTION_DOWN ? 0 : n - 1;
else {
if (o->object.type != OBJECT_ENTRY)
return -EINVAL;
r = generic_array_bisect_plus_one(f,
le64toh(d->data.entry_offset),
le64toh(d->data.entry_array_offset),
le64toh(d->data.n_entries),
p,
test_object_offset,
DIRECTION_DOWN,
NULL, NULL,
&i);
if (r <= 0)
return r;
if (direction == DIRECTION_DOWN) {
if (i >= n - 1)
return 0;
i++;
} else {
if (i <= 0)
return 0;
i--;
}
}
return generic_array_get_plus_one(f,
le64toh(d->data.entry_offset),
le64toh(d->data.entry_array_offset),
i,
ret, offset);
}
int journal_file_move_to_entry_by_seqnum_for_data(
JournalFile *f,
uint64_t data_offset,
uint64_t seqnum,
direction_t direction,
Object **ret, uint64_t *offset) {
Object *d;
int r;
r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
if (r <= 0)
return r;
return generic_array_bisect_plus_one(f,
le64toh(d->data.entry_offset),
le64toh(d->data.entry_array_offset),
le64toh(d->data.n_entries),
seqnum,
test_object_seqnum,
direction,
ret, offset, NULL);
}
int journal_file_move_to_entry_by_realtime_for_data(
JournalFile *f,
uint64_t data_offset,
uint64_t realtime,
direction_t direction,
Object **ret, uint64_t *offset) {
Object *d;
int r;
r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
if (r <= 0)
return r;
return generic_array_bisect_plus_one(f,
le64toh(d->data.entry_offset),
le64toh(d->data.entry_array_offset),
le64toh(d->data.n_entries),
realtime,
test_object_realtime,
direction,
ret, offset, NULL);
}
void journal_file_dump(JournalFile *f) {
char a[33], b[33], c[33];
Object *o;
int r;
uint64_t p;
assert(f);
printf("File Path: %s\n"
"File ID: %s\n"
"Machine ID: %s\n"
"Boot ID: %s\n"
"Arena size: %llu\n"
"Objects: %lu\n"
"Entries: %lu\n",
f->path,
sd_id128_to_string(f->header->file_id, a),
sd_id128_to_string(f->header->machine_id, b),
sd_id128_to_string(f->header->boot_id, c),
(unsigned long long) le64toh(f->header->arena_size),
(unsigned long) le64toh(f->header->n_objects),
(unsigned long) le64toh(f->header->n_entries));
p = le64toh(f->header->arena_offset);
while (p != 0) {
r = journal_file_move_to_object(f, -1, p, &o);
if (r < 0)
goto fail;
switch (o->object.type) {
case OBJECT_UNUSED:
printf("Type: OBJECT_UNUSED\n");
break;
case OBJECT_DATA:
printf("Type: OBJECT_DATA\n");
break;
case OBJECT_ENTRY:
printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
(unsigned long long) le64toh(o->entry.seqnum),
(unsigned long long) le64toh(o->entry.monotonic),
(unsigned long long) le64toh(o->entry.realtime));
break;
case OBJECT_FIELD_HASH_TABLE:
printf("Type: OBJECT_FIELD_HASH_TABLE\n");
break;
case OBJECT_DATA_HASH_TABLE:
printf("Type: OBJECT_DATA_HASH_TABLE\n");
break;
case OBJECT_ENTRY_ARRAY:
printf("Type: OBJECT_ENTRY_ARRAY\n");
break;
}
if (o->object.flags & OBJECT_COMPRESSED)
printf("Flags: COMPRESSED\n");
if (p == le64toh(f->header->tail_object_offset))
p = 0;
else
p = p + ALIGN64(le64toh(o->object.size));
}
return;
fail:
log_error("File corrupt");
}
int journal_file_open(
const char *fname,
int flags,
mode_t mode,
JournalFile *template,
JournalFile **ret) {
JournalFile *f;
int r;
bool newly_created = false;
assert(fname);
if ((flags & O_ACCMODE) != O_RDONLY &&
(flags & O_ACCMODE) != O_RDWR)
return -EINVAL;
f = new0(JournalFile, 1);
if (!f)
return -ENOMEM;
f->fd = -1;
f->flags = flags;
f->mode = mode;
f->writable = (flags & O_ACCMODE) != O_RDONLY;
f->prot = prot_from_flags(flags);
f->path = strdup(fname);
if (!f->path) {
r = -ENOMEM;
goto fail;
}
f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
if (f->fd < 0) {
r = -errno;
goto fail;
}
if (fstat(f->fd, &f->last_stat) < 0) {
r = -errno;
goto fail;
}
if (f->last_stat.st_size == 0 && f->writable) {
newly_created = true;
r = journal_file_init_header(f, template);
if (r < 0)
goto fail;
if (fstat(f->fd, &f->last_stat) < 0) {
r = -errno;
goto fail;
}
}
if (f->last_stat.st_size < (off_t) sizeof(Header)) {
r = -EIO;
goto fail;
}
f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
if (f->header == MAP_FAILED) {
f->header = NULL;
r = -errno;
goto fail;
}
if (!newly_created) {
r = journal_file_verify_header(f);
if (r < 0)
goto fail;
}
if (f->writable) {
r = journal_file_refresh_header(f);
if (r < 0)
goto fail;
}
if (newly_created) {
r = journal_file_setup_field_hash_table(f);
if (r < 0)
goto fail;
r = journal_file_setup_data_hash_table(f);
if (r < 0)
goto fail;
}
r = journal_file_map_field_hash_table(f);
if (r < 0)
goto fail;
r = journal_file_map_data_hash_table(f);
if (r < 0)
goto fail;
if (ret)
*ret = f;
return 0;
fail:
journal_file_close(f);
return r;
}
int journal_file_rotate(JournalFile **f) {
char *p;
size_t l;
JournalFile *old_file, *new_file = NULL;
int r;
assert(f);
assert(*f);
old_file = *f;
if (!old_file->writable)
return -EINVAL;
if (!endswith(old_file->path, ".journal"))
return -EINVAL;
l = strlen(old_file->path);
p = new(char, l + 1 + 16 + 1 + 32 + 1 + 16 + 1);
if (!p)
return -ENOMEM;
memcpy(p, old_file->path, l - 8);
p[l-8] = '@';
sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
"-%016llx-%016llx.journal",
(unsigned long long) le64toh((*f)->header->seqnum),
(unsigned long long) le64toh((*f)->header->tail_entry_realtime));
r = rename(old_file->path, p);
free(p);
if (r < 0)
return -errno;
old_file->header->state = le32toh(STATE_ARCHIVED);
r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file);
journal_file_close(old_file);
*f = new_file;
return r;
}
struct vacuum_info {
off_t usage;
char *filename;
uint64_t realtime;
sd_id128_t seqnum_id;
uint64_t seqnum;
};
static int vacuum_compare(const void *_a, const void *_b) {
const struct vacuum_info *a, *b;
a = _a;
b = _b;
if (sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
if (a->seqnum < b->seqnum)
return -1;
else if (a->seqnum > b->seqnum)
return 1;
else
return 0;
}
if (a->realtime < b->realtime)
return -1;
else if (a->realtime > b->realtime)
return 1;
else
return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
}
int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
DIR *d;
int r = 0;
struct vacuum_info *list = NULL;
unsigned n_list = 0, n_allocated = 0, i;
uint64_t sum = 0;
assert(directory);
if (max_use <= 0)
return 0;
d = opendir(directory);
if (!d)
return -errno;
for (;;) {
int k;
struct dirent buf, *de;
size_t q;
struct stat st;
char *p;
unsigned long long seqnum, realtime;
sd_id128_t seqnum_id;
k = readdir_r(d, &buf, &de);
if (k != 0) {
r = -k;
goto finish;
}
if (!de)
break;
if (!dirent_is_file_with_suffix(de, ".journal"))
continue;
q = strlen(de->d_name);
if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
continue;
if (de->d_name[q-8-16-1] != '-' ||
de->d_name[q-8-16-1-16-1] != '-' ||
de->d_name[q-8-16-1-16-1-32-1] != '@')
continue;
if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
continue;
if (!S_ISREG(st.st_mode))
continue;
p = strdup(de->d_name);
if (!p) {
r = -ENOMEM;
goto finish;
}
de->d_name[q-8-16-1-16-1] = 0;
if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
free(p);
continue;
}
if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
free(p);
continue;
}
if (n_list >= n_allocated) {
struct vacuum_info *j;
n_allocated = MAX(n_allocated * 2U, 8U);
j = realloc(list, n_allocated * sizeof(struct vacuum_info));
if (!j) {
free(p);
r = -ENOMEM;
goto finish;
}
list = j;
}
list[n_list].filename = p;
list[n_list].usage = (uint64_t) st.st_blksize * (uint64_t) st.st_blocks;
list[n_list].seqnum = seqnum;
list[n_list].realtime = realtime;
list[n_list].seqnum_id = seqnum_id;
sum += list[n_list].usage;
n_list ++;
}
qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
for(i = 0; i < n_list; i++) {
struct statvfs ss;
if (fstatvfs(dirfd(d), &ss) < 0) {
r = -errno;
goto finish;
}
if (sum <= max_use &&
(uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
break;
if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
log_debug("Deleted archived journal %s/%s.", directory, list[i].filename);
sum -= list[i].usage;
} else if (errno != ENOENT)
log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
}
finish:
for (i = 0; i < n_list; i++)
free(list[i].filename);
free(list);
if (d)
closedir(d);
return r;
}
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
uint64_t i, n;
uint64_t q, xor_hash = 0;
int r;
EntryItem *items;
dual_timestamp ts;
assert(from);
assert(to);
assert(o);
assert(p);
if (!to->writable)
return -EPERM;
ts.monotonic = le64toh(o->entry.monotonic);
ts.realtime = le64toh(o->entry.realtime);
if (to->tail_entry_monotonic_valid &&
ts.monotonic < le64toh(to->header->tail_entry_monotonic))
return -EINVAL;
if (ts.realtime < le64toh(to->header->tail_entry_realtime))
return -EINVAL;
n = journal_file_entry_n_items(o);
items = alloca(sizeof(EntryItem) * n);
for (i = 0; i < n; i++) {
uint64_t le_hash, l, h;
size_t t;
void *data;
Object *u;
q = le64toh(o->entry.items[i].object_offset);
le_hash = o->entry.items[i].hash;
r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
if (r < 0)
return r;
if (le_hash != o->data.hash)
return -EBADMSG;
l = le64toh(o->object.size) - offsetof(Object, data.payload);
t = (size_t) l;
/* We hit the limit on 32bit machines */
if ((uint64_t) t != l)
return -E2BIG;
if (o->object.flags & OBJECT_COMPRESSED) {
#ifdef HAVE_XZ
uint64_t rsize;
if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
return -EBADMSG;
data = from->compress_buffer;
l = rsize;
#else
return -EPROTONOSUPPORT;
#endif
} else
data = o->data.payload;
r = journal_file_append_data(to, data, l, &u, &h);
if (r < 0)
return r;
xor_hash ^= le64toh(u->data.hash);
items[i].object_offset = htole64(h);
items[i].hash = u->data.hash;
r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
}
return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
}
void journal_default_metrics(JournalMetrics *m, int fd) {
uint64_t fs_size = 0;
struct statvfs ss;
char a[64], b[64], c[64], d[64];
assert(m);
assert(fd >= 0);
if (fstatvfs(fd, &ss) >= 0)
fs_size = ss.f_frsize * ss.f_blocks;
if (m->max_use == (uint64_t) -1) {
if (fs_size > 0) {
m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
if (m->max_use > DEFAULT_MAX_USE_UPPER)
m->max_use = DEFAULT_MAX_USE_UPPER;
if (m->max_use < DEFAULT_MAX_USE_LOWER)
m->max_use = DEFAULT_MAX_USE_LOWER;
} else
m->max_use = DEFAULT_MAX_USE_LOWER;
} else {
m->max_use = PAGE_ALIGN(m->max_use);
if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
m->max_use = JOURNAL_FILE_SIZE_MIN*2;
}
if (m->max_size == (uint64_t) -1) {
m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
m->max_size = DEFAULT_MAX_SIZE_UPPER;
} else
m->max_size = PAGE_ALIGN(m->max_size);
if (m->max_size < JOURNAL_FILE_SIZE_MIN)
m->max_size = JOURNAL_FILE_SIZE_MIN;
if (m->max_size*2 > m->max_use)
m->max_use = m->max_size*2;
if (m->min_size == (uint64_t) -1)
m->min_size = JOURNAL_FILE_SIZE_MIN;
else {
m->min_size = PAGE_ALIGN(m->min_size);
if (m->min_size < JOURNAL_FILE_SIZE_MIN)
m->min_size = JOURNAL_FILE_SIZE_MIN;
if (m->min_size > m->max_size)
m->max_size = m->min_size;
}
if (m->keep_free == (uint64_t) -1) {
if (fs_size > 0) {
m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
m->keep_free = DEFAULT_KEEP_FREE_UPPER;
} else
m->keep_free = DEFAULT_KEEP_FREE;
}
log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
format_bytes(a, sizeof(a), m->max_use),
format_bytes(b, sizeof(b), m->max_size),
format_bytes(c, sizeof(c), m->min_size),
format_bytes(d, sizeof(d), m->keep_free));
}