journal-file.c revision 48b617399d7d8446c5310b2568b2af6f13331b4c
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen/***
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen This file is part of systemd.
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen Copyright 2011 Lennart Poettering
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen systemd is free software; you can redistribute it and/or modify it
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen under the terms of the GNU Lesser General Public License as published by
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen the Free Software Foundation; either version 2.1 of the License, or
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen (at your option) any later version.
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen systemd is distributed in the hope that it will be useful, but
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen WITHOUT ANY WARRANTY; without even the implied warranty of
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen Lesser General Public License for more details.
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen You should have received a copy of the GNU Lesser General Public License
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen along with systemd; If not, see <http://www.gnu.org/licenses/>.
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen***/
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen#include <sys/mman.h>
7eb08da4b388b920c8a894b1500c9cc7dc1f31efTom Gundersen#include <errno.h>
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen#include <sys/uio.h>
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering#include <unistd.h>
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen#include <sys/statvfs.h>
b5efdb8af40ea759a1ea584c1bc44ecc81dd00ceLennart Poettering#include <fcntl.h>
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering#include <stddef.h>
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering
a501033335ed402c8f7e86fe41a15531ba69abd7Tom Gundersen#include "journal-def.h"
3ffd4af22052963e7a29431721ee204e634bea75Lennart Poettering#include "journal-file.h"
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen#include "journal-authenticate.h"
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering#include "lookup3.h"
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen#include "compress.h"
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering#include "fsprg.h"
1c4baffc1895809bae9ac36b670af90a4cb9cd7dTom Gundersen
c6f7c917a1b494d4455800823472227463f87438Tom Gundersen#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
6bedfcbb2970e06a4d3280c8fb62083d252ede73Lennart Poettering#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering
4e731273edfe852a3eee2949cd20f49fd5b4f6d7Lennart Poettering#define COMPRESSION_SIZE_THRESHOLD (512ULL)
3df3e884ae1237ef0d4d23b0e80f4ffda95ac135Ronny Chevalier
8fcde01280adcbd07e8205b91ac52b06305b6208Lennart Poettering/* This is the minimum journal file size */
8b43440b7ef4b81c69c31de7ff820dc07a780254Lennart Poettering#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering/* These are the lower and upper bounds if we deduce the max_use value
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering * from the file system size */
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen/* This is the upper bound if we deduce max_size from max_use */
a501033335ed402c8f7e86fe41a15531ba69abd7Tom Gundersen#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
a501033335ed402c8f7e86fe41a15531ba69abd7Tom Gundersen
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen/* This is the upper bound if we deduce the keep_free value from the
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen * file system size */
1c4baffc1895809bae9ac36b670af90a4cb9cd7dTom Gundersen#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen
97f2d76d4f4dfab8b0629c09926a05a1e5621125Tom Gundersen/* This is the keep_free value when we can't determine the system
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen * size */
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek/* n_data was the first entry we added after the initial file format design */
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmekvoid journal_file_close(JournalFile *f) {
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek assert(f);
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek#ifdef HAVE_GCRYPT
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek /* Write the final tag */
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen if (f->seal && f->writable)
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen journal_file_append_tag(f);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen#endif
5b9d4dc05560ddda89e48b6b39365824b15e1300Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen /* Sync everything to disk, before we mark the file offline */
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen if (f->mmap && f->fd >= 0)
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen mmap_cache_close_fd(f->mmap, f->fd);
43d60b77a83b3185e37c65c4f2649d24c227c7f0Tom Gundersen
43d60b77a83b3185e37c65c4f2649d24c227c7f0Tom Gundersen if (f->writable && f->fd >= 0)
43d60b77a83b3185e37c65c4f2649d24c227c7f0Tom Gundersen fdatasync(f->fd);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen if (f->header) {
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen /* Mark the file offline. Don't override the archived state if it already is set */
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen if (f->writable && f->header->state == STATE_ONLINE)
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen f->header->state = STATE_OFFLINE;
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen munmap(f->header, PAGE_ALIGN(sizeof(Header)));
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen }
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen if (f->fd >= 0)
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen close_nointr_nofail(f->fd);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen free(f->path);
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if (f->mmap)
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen mmap_cache_unref(f->mmap);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen#ifdef HAVE_XZ
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen free(f->compress_buffer);
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen#endif
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen#ifdef HAVE_GCRYPT
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if (f->fss_file)
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen else if (f->fsprg_state)
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen free(f->fsprg_state);
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen free(f->fsprg_seed);
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if (f->hmac)
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen gcry_md_close(f->hmac);
03e334a1c7dc8c20c38902aa039440763acc9b17Lennart Poettering#endif
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen
1c4baffc1895809bae9ac36b670af90a4cb9cd7dTom Gundersen free(f);
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen}
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersenstatic int journal_file_init_header(JournalFile *f, JournalFile *template) {
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen Header h;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen ssize_t k;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen int r;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen assert(f);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen zero(h);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen memcpy(h.signature, HEADER_SIGNATURE, 8);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen h.header_size = htole64(ALIGN64(sizeof(h)));
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen h.incompatible_flags =
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen h.compatible_flags =
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen r = sd_id128_randomize(&h.file_id);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen if (r < 0)
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen return r;
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen if (template) {
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen h.seqnum_id = template->header->seqnum_id;
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen h.tail_entry_seqnum = template->header->tail_entry_seqnum;
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen } else
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen h.seqnum_id = h.file_id;
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen k = pwrite(f->fd, &h, sizeof(h), 0);
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen if (k < 0)
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen return -errno;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
9a4b012e43f23516373bf398dd9a458439d19939Tom Gundersen if (k != sizeof(h))
6e37cd2f4af8928d905203108a4331e375d7127cThomas Hindoe Paaboel Andersen return -EIO;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen return 0;
187dc6e554f2d5b4b5a3bee72c73ff5df6418aa6Thomas Hindoe Paaboel Andersen}
187dc6e554f2d5b4b5a3bee72c73ff5df6418aa6Thomas Hindoe Paaboel Andersen
187dc6e554f2d5b4b5a3bee72c73ff5df6418aa6Thomas Hindoe Paaboel Andersenstatic int journal_file_refresh_header(JournalFile *f) {
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen int r;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen sd_id128_t boot_id;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen assert(f);
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
ecb08ec6a5c52f2d940f3b8147e2a480affd46e1Zbigniew Jędrzejewski-Szmek r = sd_id128_get_machine(&f->header->machine_id);
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if (r < 0)
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen return r;
ed88bcfb7c15029f9fc95ee2380759a9eb782d46Zbigniew Jędrzejewski-Szmek
ed88bcfb7c15029f9fc95ee2380759a9eb782d46Zbigniew Jędrzejewski-Szmek r = sd_id128_get_boot(&boot_id);
ed88bcfb7c15029f9fc95ee2380759a9eb782d46Zbigniew Jędrzejewski-Szmek if (r < 0)
ed88bcfb7c15029f9fc95ee2380759a9eb782d46Zbigniew Jędrzejewski-Szmek return r;
ed88bcfb7c15029f9fc95ee2380759a9eb782d46Zbigniew Jędrzejewski-Szmek
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if (sd_id128_equal(boot_id, f->header->boot_id))
ecb08ec6a5c52f2d940f3b8147e2a480affd46e1Zbigniew Jędrzejewski-Szmek f->tail_entry_monotonic_valid = true;
ecb08ec6a5c52f2d940f3b8147e2a480affd46e1Zbigniew Jędrzejewski-Szmek
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen f->header->boot_id = boot_id;
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen f->header->state = STATE_ONLINE;
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen /* Sync the online state to disk */
e9f3d2d508bfd9fb5b54e82994bda365a71eb864Zbigniew Jędrzejewski-Szmek msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
e9f3d2d508bfd9fb5b54e82994bda365a71eb864Zbigniew Jędrzejewski-Szmek fdatasync(f->fd);
e9f3d2d508bfd9fb5b54e82994bda365a71eb864Zbigniew Jędrzejewski-Szmek
36f822c4bd077f9121757e24b6516e5c7ada63b5Zbigniew Jędrzejewski-Szmek return 0;
36f822c4bd077f9121757e24b6516e5c7ada63b5Zbigniew Jędrzejewski-Szmek}
ecb08ec6a5c52f2d940f3b8147e2a480affd46e1Zbigniew Jędrzejewski-Szmek
36f822c4bd077f9121757e24b6516e5c7ada63b5Zbigniew Jędrzejewski-Szmekstatic int journal_file_verify_header(JournalFile *f) {
98a375f6d5cac24eb80d6d4e00699851324afdecTom Gundersen assert(f);
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
dab495dc23bf9a5ba0487a057bb594355555a0e9Tom Gundersen if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
dab495dc23bf9a5ba0487a057bb594355555a0e9Tom Gundersen return -EBADMSG;
dab495dc23bf9a5ba0487a057bb594355555a0e9Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen /* In both read and write mode we refuse to open files with
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen * incompatible flags we don't know */
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen#ifdef HAVE_XZ
ecb08ec6a5c52f2d940f3b8147e2a480affd46e1Zbigniew Jędrzejewski-Szmek if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen return -EPROTONOSUPPORT;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen#else
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if (f->header->incompatible_flags != 0)
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen return -EPROTONOSUPPORT;
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen#endif
f8a0bb5285024b6ce372c3157e761e6543ebdcd2Andreas Henriksson
a2a5291b3f5ab6ed4c92f51d0fd10a03047380d8Zbigniew Jędrzejewski-Szmek /* When open for writing we refuse to open files with
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen * compatible flags, too */
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen if (f->writable) {
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen#ifdef HAVE_GCRYPT
74df0fca09b3c31ed19e14ba80f996fdff772417Lennart Poettering if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
b5884878a2874447b2a9f07f324a7cd909d96d48Lennart Poettering return -EPROTONOSUPPORT;
da927ba997d68401563b927f92e6e40e021a8e5cMichal Schmidt#else
74df0fca09b3c31ed19e14ba80f996fdff772417Lennart Poettering if (f->header->compatible_flags != 0)
b5884878a2874447b2a9f07f324a7cd909d96d48Lennart Poettering return -EPROTONOSUPPORT;
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen#endif
a2a5291b3f5ab6ed4c92f51d0fd10a03047380d8Zbigniew Jędrzejewski-Szmek }
a2a5291b3f5ab6ed4c92f51d0fd10a03047380d8Zbigniew Jędrzejewski-Szmek
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen if (f->header->state >= _STATE_MAX)
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen return -EBADMSG;
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen /* The first addition was n_data, so check that we are at least this large */
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen return -EBADMSG;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
edf029b7fd9a5853a87d3ca99aac2922bb8a277eTom Gundersen if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
edf029b7fd9a5853a87d3ca99aac2922bb8a277eTom Gundersen return -EBADMSG;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen return -ENODATA;
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
3f85ef0f05ffc51e19f86fb83a1c51e8e3cd6817Harald Hoyer return -ENODATA;
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
97f2d76d4f4dfab8b0629c09926a05a1e5621125Tom Gundersen !VALID64(le64toh(f->header->field_hash_table_offset)) ||
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek !VALID64(le64toh(f->header->tail_object_offset)) ||
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen !VALID64(le64toh(f->header->entry_array_offset)))
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek return -ENODATA;
f647962d64e844689f3e2acfce6102fc47e76df2Michal Schmidt
f647962d64e844689f3e2acfce6102fc47e76df2Michal Schmidt if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen return -ENODATA;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if (f->writable) {
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen uint8_t state;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen sd_id128_t machine_id;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen int r;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen r = sd_id128_get_machine(&machine_id);
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek if (r < 0)
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen return r;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen if (!sd_id128_equal(machine_id, f->header->machine_id))
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen return -EHOSTDOWN;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen state = f->header->state;
3b64e4d4f40baac56148c7d333d6a0053358ec7aTom Gundersen
3b64e4d4f40baac56148c7d333d6a0053358ec7aTom Gundersen if (state == STATE_ONLINE) {
3b64e4d4f40baac56148c7d333d6a0053358ec7aTom Gundersen log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
3b64e4d4f40baac56148c7d333d6a0053358ec7aTom Gundersen return -EBUSY;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen } else if (state == STATE_ARCHIVED)
7eb08da4b388b920c8a894b1500c9cc7dc1f31efTom Gundersen return -ESHUTDOWN;
7eb08da4b388b920c8a894b1500c9cc7dc1f31efTom Gundersen else if (state != STATE_OFFLINE) {
7eb08da4b388b920c8a894b1500c9cc7dc1f31efTom Gundersen log_debug("Journal file %s has unknown state %u.", f->path, state);
b3e013148603aa670bc2c060ac63d48e54d76fc2Tom Gundersen return -EBUSY;
edbb03e95a3c31bf719d5c6c46eec14d0bcb9c8fTom Gundersen }
7eb08da4b388b920c8a894b1500c9cc7dc1f31efTom Gundersen }
edbb03e95a3c31bf719d5c6c46eec14d0bcb9c8fTom Gundersen
eb7040ec50fbfe5aad9eaf305bd442a4a235abaaTom Gundersen f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
b3e013148603aa670bc2c060ac63d48e54d76fc2Tom Gundersen
9b1c2626cef16722603bded9bb52033aba34dd74Tom Gundersen if (f->writable)
bf175aafd20c9ef974709ef12c5acf836121af33Tom Gundersen f->seal = JOURNAL_HEADER_SEALED(f->header);
b3e013148603aa670bc2c060ac63d48e54d76fc2Tom Gundersen
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen return 0;
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen}
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersenstatic int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen uint64_t old_size, new_size;
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen int r;
dc75168823540076b354135f6e2de7a9a978fbcaZbigniew Jędrzejewski-Szmek
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen assert(f);
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen
ca6038b89645c0c1bd547d6a420bf95eb3d6f4ccTom Gundersen /* We assume that this file is not sparse, and we know that
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen * for sure, since we always call posix_fallocate()
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen * ourselves */
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen old_size =
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen le64toh(f->header->header_size) +
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen le64toh(f->header->arena_size);
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen new_size = PAGE_ALIGN(offset + size);
ca6038b89645c0c1bd547d6a420bf95eb3d6f4ccTom Gundersen if (new_size < le64toh(f->header->header_size))
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen new_size = le64toh(f->header->header_size);
ca6038b89645c0c1bd547d6a420bf95eb3d6f4ccTom Gundersen
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen if (new_size <= old_size)
ca6038b89645c0c1bd547d6a420bf95eb3d6f4ccTom Gundersen return 0;
ca6038b89645c0c1bd547d6a420bf95eb3d6f4ccTom Gundersen
32bc8adcd836baff68e4d0f53b9a382f358cccf8Tom Gundersen if (f->metrics.max_size > 0 &&
ca6038b89645c0c1bd547d6a420bf95eb3d6f4ccTom Gundersen new_size > f->metrics.max_size)
ca6038b89645c0c1bd547d6a420bf95eb3d6f4ccTom Gundersen return -E2BIG;
ca6038b89645c0c1bd547d6a420bf95eb3d6f4ccTom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if (new_size > f->metrics.min_size &&
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen f->metrics.keep_free > 0) {
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen struct statvfs svfs;
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen if (fstatvfs(f->fd, &svfs) >= 0) {
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen uint64_t available;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen available = svfs.f_bfree * svfs.f_bsize;
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen if (available >= f->metrics.keep_free)
f1ac700248f231b7bdac2aafe8c35650efddb89fTom Gundersen available -= f->metrics.keep_free;
f1ac700248f231b7bdac2aafe8c35650efddb89fTom Gundersen else
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen available = 0;
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen if (new_size - old_size > available)
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen return -E2BIG;
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen }
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen }
f1ac700248f231b7bdac2aafe8c35650efddb89fTom Gundersen
f1ac700248f231b7bdac2aafe8c35650efddb89fTom Gundersen /* Note that the glibc fallocate() fallback is very
f1ac700248f231b7bdac2aafe8c35650efddb89fTom Gundersen inefficient, hence we try to minimize the allocation area
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen as we can. */
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen r = posix_fallocate(f->fd, old_size, new_size - old_size);
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen if (r != 0)
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen return -r;
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen if (fstat(f->fd, &f->last_stat) < 0)
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen return -errno;
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen return 0;
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen}
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersenstatic int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen assert(f);
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen assert(ret);
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen if (size <= 0)
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen return -EINVAL;
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen /* Avoid SIGBUS on invalid accesses */
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen if (offset + size > (uint64_t) f->last_stat.st_size) {
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen /* Hmm, out of range? Let's refresh the fstat() data
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen * first, before we trust that check. */
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen if (fstat(f->fd, &f->last_stat) < 0 ||
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen offset + size > (uint64_t) f->last_stat.st_size)
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen return -EADDRNOTAVAIL;
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen }
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen}
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersenstatic uint64_t minimum_header_size(Object *o) {
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen static uint64_t table[] = {
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen [OBJECT_DATA] = sizeof(DataObject),
9bf3b53533cdc9b95c921b71da755401f223f765Lennart Poettering [OBJECT_FIELD] = sizeof(FieldObject),
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen [OBJECT_ENTRY] = sizeof(EntryObject),
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
9bf3b53533cdc9b95c921b71da755401f223f765Lennart Poettering [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
9bf3b53533cdc9b95c921b71da755401f223f765Lennart Poettering [OBJECT_TAG] = sizeof(TagObject),
9bf3b53533cdc9b95c921b71da755401f223f765Lennart Poettering };
b5db00e52ee2e20578839e4e4488f7b9af9abc38Umut Tezduyar Lindskog
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
55428d84f31b52da1c50b7469f14e15740547f20Tom Gundersen return sizeof(ObjectHeader);
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen
9bf3b53533cdc9b95c921b71da755401f223f765Lennart Poettering return table[o->object.type];
9bf3b53533cdc9b95c921b71da755401f223f765Lennart Poettering}
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersenint journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen int r;
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen void *t;
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersen Object *o;
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen uint64_t s;
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen unsigned context;
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen assert(f);
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen assert(ret);
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen
3e137a1b9a0eac2bf43d493d3302c3c959b6ccdbTom Gundersen /* Objects may only be located at multiple of 64 bit */
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen if (!VALID64(offset))
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen return -EFAULT;
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen /* One context for each type, plus one catch-all for the rest */
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen
3e137a1b9a0eac2bf43d493d3302c3c959b6ccdbTom Gundersen r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
3e137a1b9a0eac2bf43d493d3302c3c959b6ccdbTom Gundersen if (r < 0)
3e137a1b9a0eac2bf43d493d3302c3c959b6ccdbTom Gundersen return r;
3e137a1b9a0eac2bf43d493d3302c3c959b6ccdbTom Gundersen
3e137a1b9a0eac2bf43d493d3302c3c959b6ccdbTom Gundersen o = (Object*) t;
3e137a1b9a0eac2bf43d493d3302c3c959b6ccdbTom Gundersen s = le64toh(o->object.size);
3e137a1b9a0eac2bf43d493d3302c3c959b6ccdbTom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if (s < sizeof(ObjectHeader))
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen return -EBADMSG;
dab495dc23bf9a5ba0487a057bb594355555a0e9Tom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen if (o->object.type <= OBJECT_UNUSED)
dab495dc23bf9a5ba0487a057bb594355555a0e9Tom Gundersen return -EBADMSG;
755bde375f4db393ad06e73340bfcf4d0cf91bb2Lennart Poettering
755bde375f4db393ad06e73340bfcf4d0cf91bb2Lennart Poettering if (s < minimum_header_size(o))
a501033335ed402c8f7e86fe41a15531ba69abd7Tom Gundersen return -EBADMSG;
aedca89268ed4fd6be41e55a605f011033ad1fb5Tom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen if (type >= 0 && o->object.type != type)
755bde375f4db393ad06e73340bfcf4d0cf91bb2Lennart Poettering return -EBADMSG;
755bde375f4db393ad06e73340bfcf4d0cf91bb2Lennart Poettering
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen if (s > sizeof(ObjectHeader)) {
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen if (r < 0)
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen return r;
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen o = (Object*) t;
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen }
f61942250a43a123580d7bbe5d7873dc5118ed97Tom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen *ret = o;
daeb71a36a98834664e4d95773a3629b746f4db8Tom Gundersen return 0;
68ba38770640413b4fa06773447666eb88a38d4cTom Gundersen}
68ba38770640413b4fa06773447666eb88a38d4cTom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersenstatic uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen uint64_t r;
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen assert(f);
e51660ae56bb747ece2cab8fe6eec37f4d06a438Tom Gundersen
e51660ae56bb747ece2cab8fe6eec37f4d06a438Tom Gundersen r = le64toh(f->header->tail_entry_seqnum) + 1;
e51660ae56bb747ece2cab8fe6eec37f4d06a438Tom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen if (seqnum) {
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen /* If an external seqnum counter was passed, we update
daeb71a36a98834664e4d95773a3629b746f4db8Tom Gundersen * both the local and the external one, and set it to
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen * the maximum of both */
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen
daeb71a36a98834664e4d95773a3629b746f4db8Tom Gundersen if (*seqnum + 1 > r)
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen r = *seqnum + 1;
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen
daeb71a36a98834664e4d95773a3629b746f4db8Tom Gundersen *seqnum = r;
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen }
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen
daeb71a36a98834664e4d95773a3629b746f4db8Tom Gundersen f->header->tail_entry_seqnum = htole64(r);
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen if (f->header->head_entry_seqnum == 0)
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen f->header->head_entry_seqnum = htole64(r);
daeb71a36a98834664e4d95773a3629b746f4db8Tom Gundersen
daeb71a36a98834664e4d95773a3629b746f4db8Tom Gundersen return r;
daeb71a36a98834664e4d95773a3629b746f4db8Tom Gundersen}
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen
3c9b886068d99e5d3cbabcac32a4decf37244c54Tom Gundersenint journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen int r;
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen uint64_t p;
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen Object *tail, *o;
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen void *t;
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen assert(f);
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen assert(type > 0 && type < _OBJECT_TYPE_MAX);
92d927f850d4b668b44f3e5f41e266d934d03726Tom Gundersen assert(size >= sizeof(ObjectHeader));
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen assert(offset);
1c25683e0f40c6169676cc44fa1897082597feecTom Gundersen assert(ret);
1c25683e0f40c6169676cc44fa1897082597feecTom Gundersen
a669ea9860900d5cdebbc4cb9aaea72db7e28a02Tom Gundersen p = le64toh(f->header->tail_object_offset);
1c25683e0f40c6169676cc44fa1897082597feecTom Gundersen if (p == 0)
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen p = le64toh(f->header->header_size);
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen else {
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen r = journal_file_move_to_object(f, -1, p, &tail);
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen if (r < 0)
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen return r;
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen p += ALIGN64(le64toh(tail->object.size));
1c25683e0f40c6169676cc44fa1897082597feecTom Gundersen }
1c25683e0f40c6169676cc44fa1897082597feecTom Gundersen
a669ea9860900d5cdebbc4cb9aaea72db7e28a02Tom Gundersen r = journal_file_allocate(f, p, size);
1c25683e0f40c6169676cc44fa1897082597feecTom Gundersen if (r < 0)
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen return r;
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen r = journal_file_move_to(f, type, false, p, size, &t);
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen if (r < 0)
66d3752e812915a549ebee01769ee761c1495667Jacob Keller return r;
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen
5fde13d748749f0e06e2e6cdd15f0980a79ea82cTom Gundersen o = (Object*) t;
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen
16b9b87aeee9353b5b8dae6089a69752422a5b09Tom Gundersen zero(o->object);
dab495dc23bf9a5ba0487a057bb594355555a0e9Tom Gundersen o->object.type = type;
f647962d64e844689f3e2acfce6102fc47e76df2Michal Schmidt o->object.size = htole64(size);
f647962d64e844689f3e2acfce6102fc47e76df2Michal Schmidt
43b3a5ef61859f06cdbaf26765cab8e1adac4296Tom Gundersen f->header->tail_object_offset = htole64(p);
d95b83b87d7d7c50e550f7128827f73a321c8934Tom Gundersen f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
d95b83b87d7d7c50e550f7128827f73a321c8934Tom Gundersen
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen *ret = o;
af6f0d422c521374ee6a2dd92df5935a5a476ae5Tom Gundersen *offset = p;
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen return 0;
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen}
a7f7d1bde43fc825c49afea3f946f5b4b3d563e0Harald Hoyer
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersenstatic int journal_file_setup_data_hash_table(JournalFile *f) {
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen uint64_t s, p;
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen Object *o;
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen int r;
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen assert(f);
aedca89268ed4fd6be41e55a605f011033ad1fb5Tom Gundersen
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen /* We estimate that we need 1 hash table entry per 768 of
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen journal file and we want to make sure we never get beyond
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen 75% fill level. Calculate the hash table size for the
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen maximum file size based on these metrics. */
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
847a8a5fed4d265dfa659917515c6f9bd1b8d5c4Tom Gundersen if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
2c5859afecee81e345fc9526b1083bf79990ffb8Daniel Mack s = DEFAULT_DATA_HASH_TABLE_SIZE;
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen
66d3752e812915a549ebee01769ee761c1495667Jacob Keller log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
66d3752e812915a549ebee01769ee761c1495667Jacob Keller
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen r = journal_file_append_object(f,
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen OBJECT_DATA_HASH_TABLE,
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen offsetof(Object, hash_table.items) + s,
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen &o, &p);
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen if (r < 0)
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen return r;
2c5859afecee81e345fc9526b1083bf79990ffb8Daniel Mack
04b67d49254d956d31bcfe80340fb9df7ed332d3Tom Gundersen memset(o->hash_table.items, 0, s);
e51660ae56bb747ece2cab8fe6eec37f4d06a438Tom Gundersen
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen f->header->data_hash_table_size = htole64(s);
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen return 0;
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen}
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersen
be32eb9b7fbcb22e4b648086d644135e38279633Tom Gundersenstatic int journal_file_setup_field_hash_table(JournalFile *f) {
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen uint64_t s, p;
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen Object *o;
464cf22f17e0cf2d8bfa6d72b5e7a662d634f149Tom Gundersen int r;
assert(f);
s = DEFAULT_FIELD_HASH_TABLE_SIZE;
r = journal_file_append_object(f,
OBJECT_FIELD_HASH_TABLE,
offsetof(Object, hash_table.items) + s,
&o, &p);
if (r < 0)
return r;
memset(o->hash_table.items, 0, s);
f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
f->header->field_hash_table_size = htole64(s);
return 0;
}
static int journal_file_map_data_hash_table(JournalFile *f) {
uint64_t s, p;
void *t;
int r;
assert(f);
p = le64toh(f->header->data_hash_table_offset);
s = le64toh(f->header->data_hash_table_size);
r = journal_file_move_to(f,
OBJECT_DATA_HASH_TABLE,
true,
p, s,
&t);
if (r < 0)
return r;
f->data_hash_table = t;
return 0;
}
static int journal_file_map_field_hash_table(JournalFile *f) {
uint64_t s, p;
void *t;
int r;
assert(f);
p = le64toh(f->header->field_hash_table_offset);
s = le64toh(f->header->field_hash_table_size);
r = journal_file_move_to(f,
OBJECT_FIELD_HASH_TABLE,
true,
p, s,
&t);
if (r < 0)
return r;
f->field_hash_table = t;
return 0;
}
static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
uint64_t p, h;
int r;
assert(f);
assert(o);
assert(offset > 0);
assert(o->object.type == OBJECT_DATA);
/* This might alter the window we are looking at */
o->data.next_hash_offset = o->data.next_field_offset = 0;
o->data.entry_offset = o->data.entry_array_offset = 0;
o->data.n_entries = 0;
h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
p = le64toh(f->data_hash_table[h].tail_hash_offset);
if (p == 0) {
/* Only entry in the hash table is easy */
f->data_hash_table[h].head_hash_offset = htole64(offset);
} else {
/* Move back to the previous data object, to patch in
* pointer */
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
o->data.next_hash_offset = htole64(offset);
}
f->data_hash_table[h].tail_hash_offset = htole64(offset);
if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
return 0;
}
int journal_file_find_data_object_with_hash(
JournalFile *f,
const void *data, uint64_t size, uint64_t hash,
Object **ret, uint64_t *offset) {
uint64_t p, osize, h;
int r;
assert(f);
assert(data || size == 0);
osize = offsetof(Object, data.payload) + size;
if (f->header->data_hash_table_size == 0)
return -EBADMSG;
h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
p = le64toh(f->data_hash_table[h].head_hash_offset);
while (p > 0) {
Object *o;
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
if (le64toh(o->data.hash) != hash)
goto next;
if (o->object.flags & OBJECT_COMPRESSED) {
#ifdef HAVE_XZ
uint64_t l, rsize;
l = le64toh(o->object.size);
if (l <= offsetof(Object, data.payload))
return -EBADMSG;
l -= offsetof(Object, data.payload);
if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
return -EBADMSG;
if (rsize == size &&
memcmp(f->compress_buffer, data, size) == 0) {
if (ret)
*ret = o;
if (offset)
*offset = p;
return 1;
}
#else
return -EPROTONOSUPPORT;
#endif
} else if (le64toh(o->object.size) == osize &&
memcmp(o->data.payload, data, size) == 0) {
if (ret)
*ret = o;
if (offset)
*offset = p;
return 1;
}
next:
p = le64toh(o->data.next_hash_offset);
}
return 0;
}
int journal_file_find_data_object(
JournalFile *f,
const void *data, uint64_t size,
Object **ret, uint64_t *offset) {
uint64_t hash;
assert(f);
assert(data || size == 0);
hash = hash64(data, size);
return journal_file_find_data_object_with_hash(f,
data, size, hash,
ret, offset);
}
static int journal_file_append_data(
JournalFile *f,
const void *data, uint64_t size,
Object **ret, uint64_t *offset) {
uint64_t hash, p;
uint64_t osize;
Object *o;
int r;
bool compressed = false;
assert(f);
assert(data || size == 0);
hash = hash64(data, size);
r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
if (r < 0)
return r;
else if (r > 0) {
if (ret)
*ret = o;
if (offset)
*offset = p;
return 0;
}
osize = offsetof(Object, data.payload) + size;
r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
if (r < 0)
return r;
o->data.hash = htole64(hash);
#ifdef HAVE_XZ
if (f->compress &&
size >= COMPRESSION_SIZE_THRESHOLD) {
uint64_t rsize;
compressed = compress_blob(data, size, o->data.payload, &rsize);
if (compressed) {
o->object.size = htole64(offsetof(Object, data.payload) + rsize);
o->object.flags |= OBJECT_COMPRESSED;
log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
}
}
#endif
if (!compressed && size > 0)
memcpy(o->data.payload, data, size);
r = journal_file_link_data(f, o, p, hash);
if (r < 0)
return r;
#ifdef HAVE_GCRYPT
r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
if (r < 0)
return r;
#endif
/* The linking might have altered the window, so let's
* refresh our pointer */
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = p;
return 0;
}
uint64_t journal_file_entry_n_items(Object *o) {
assert(o);
assert(o->object.type == OBJECT_ENTRY);
return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
}
uint64_t journal_file_entry_array_n_items(Object *o) {
assert(o);
assert(o->object.type == OBJECT_ENTRY_ARRAY);
return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
}
uint64_t journal_file_hash_table_n_items(Object *o) {
assert(o);
assert(o->object.type == OBJECT_DATA_HASH_TABLE ||
o->object.type == OBJECT_FIELD_HASH_TABLE);
return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
}
static int link_entry_into_array(JournalFile *f,
le64_t *first,
le64_t *idx,
uint64_t p) {
int r;
uint64_t n = 0, ap = 0, q, i, a, hidx;
Object *o;
assert(f);
assert(first);
assert(idx);
assert(p > 0);
a = le64toh(*first);
i = hidx = le64toh(*idx);
while (a > 0) {
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
if (r < 0)
return r;
n = journal_file_entry_array_n_items(o);
if (i < n) {
o->entry_array.items[i] = htole64(p);
*idx = htole64(hidx + 1);
return 0;
}
i -= n;
ap = a;
a = le64toh(o->entry_array.next_entry_array_offset);
}
if (hidx > n)
n = (hidx+1) * 2;
else
n = n * 2;
if (n < 4)
n = 4;
r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
&o, &q);
if (r < 0)
return r;
#ifdef HAVE_GCRYPT
r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
if (r < 0)
return r;
#endif
o->entry_array.items[i] = htole64(p);
if (ap == 0)
*first = htole64(q);
else {
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
if (r < 0)
return r;
o->entry_array.next_entry_array_offset = htole64(q);
}
if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
*idx = htole64(hidx + 1);
return 0;
}
static int link_entry_into_array_plus_one(JournalFile *f,
le64_t *extra,
le64_t *first,
le64_t *idx,
uint64_t p) {
int r;
assert(f);
assert(extra);
assert(first);
assert(idx);
assert(p > 0);
if (*idx == 0)
*extra = htole64(p);
else {
le64_t i;
i = htole64(le64toh(*idx) - 1);
r = link_entry_into_array(f, first, &i, p);
if (r < 0)
return r;
}
*idx = htole64(le64toh(*idx) + 1);
return 0;
}
static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
uint64_t p;
int r;
assert(f);
assert(o);
assert(offset > 0);
p = le64toh(o->entry.items[i].object_offset);
if (p == 0)
return -EINVAL;
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
return link_entry_into_array_plus_one(f,
&o->data.entry_offset,
&o->data.entry_array_offset,
&o->data.n_entries,
offset);
}
static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
uint64_t n, i;
int r;
assert(f);
assert(o);
assert(offset > 0);
assert(o->object.type == OBJECT_ENTRY);
__sync_synchronize();
/* Link up the entry itself */
r = link_entry_into_array(f,
&f->header->entry_array_offset,
&f->header->n_entries,
offset);
if (r < 0)
return r;
/* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
if (f->header->head_entry_realtime == 0)
f->header->head_entry_realtime = o->entry.realtime;
f->header->tail_entry_realtime = o->entry.realtime;
f->header->tail_entry_monotonic = o->entry.monotonic;
f->tail_entry_monotonic_valid = true;
/* Link up the items */
n = journal_file_entry_n_items(o);
for (i = 0; i < n; i++) {
r = journal_file_link_entry_item(f, o, offset, i);
if (r < 0)
return r;
}
return 0;
}
static int journal_file_append_entry_internal(
JournalFile *f,
const dual_timestamp *ts,
uint64_t xor_hash,
const EntryItem items[], unsigned n_items,
uint64_t *seqnum,
Object **ret, uint64_t *offset) {
uint64_t np;
uint64_t osize;
Object *o;
int r;
assert(f);
assert(items || n_items == 0);
assert(ts);
osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
if (r < 0)
return r;
o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
o->entry.realtime = htole64(ts->realtime);
o->entry.monotonic = htole64(ts->monotonic);
o->entry.xor_hash = htole64(xor_hash);
o->entry.boot_id = f->header->boot_id;
#ifdef HAVE_GCRYPT
r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
if (r < 0)
return r;
#endif
r = journal_file_link_entry(f, o, np);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = np;
return 0;
}
void journal_file_post_change(JournalFile *f) {
assert(f);
/* inotify() does not receive IN_MODIFY events from file
* accesses done via mmap(). After each access we hence
* trigger IN_MODIFY by truncating the journal file to its
* current size which triggers IN_MODIFY. */
__sync_synchronize();
if (ftruncate(f->fd, f->last_stat.st_size) < 0)
log_error("Failed to to truncate file to its own size: %m");
}
int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
unsigned i;
EntryItem *items;
int r;
uint64_t xor_hash = 0;
struct dual_timestamp _ts;
assert(f);
assert(iovec || n_iovec == 0);
if (!f->writable)
return -EPERM;
if (!ts) {
dual_timestamp_get(&_ts);
ts = &_ts;
}
if (f->tail_entry_monotonic_valid &&
ts->monotonic < le64toh(f->header->tail_entry_monotonic))
return -EINVAL;
#ifdef HAVE_GCRYPT
r = journal_file_maybe_append_tag(f, ts->realtime);
if (r < 0)
return r;
#endif
/* alloca() can't take 0, hence let's allocate at least one */
items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
for (i = 0; i < n_iovec; i++) {
uint64_t p;
Object *o;
r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
if (r < 0)
return r;
xor_hash ^= le64toh(o->data.hash);
items[i].object_offset = htole64(p);
items[i].hash = o->data.hash;
}
r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
journal_file_post_change(f);
return r;
}
static int generic_array_get(JournalFile *f,
uint64_t first,
uint64_t i,
Object **ret, uint64_t *offset) {
Object *o;
uint64_t p = 0, a;
int r;
assert(f);
a = first;
while (a > 0) {
uint64_t n;
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
if (r < 0)
return r;
n = journal_file_entry_array_n_items(o);
if (i < n) {
p = le64toh(o->entry_array.items[i]);
break;
}
i -= n;
a = le64toh(o->entry_array.next_entry_array_offset);
}
if (a <= 0 || p <= 0)
return 0;
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = p;
return 1;
}
static int generic_array_get_plus_one(JournalFile *f,
uint64_t extra,
uint64_t first,
uint64_t i,
Object **ret, uint64_t *offset) {
Object *o;
assert(f);
if (i == 0) {
int r;
r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = extra;
return 1;
}
return generic_array_get(f, first, i-1, ret, offset);
}
enum {
TEST_FOUND,
TEST_LEFT,
TEST_RIGHT
};
static int generic_array_bisect(JournalFile *f,
uint64_t first,
uint64_t n,
uint64_t needle,
int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
direction_t direction,
Object **ret,
uint64_t *offset,
uint64_t *idx) {
uint64_t a, p, t = 0, i = 0, last_p = 0;
bool subtract_one = false;
Object *o, *array = NULL;
int r;
assert(f);
assert(test_object);
a = first;
while (a > 0) {
uint64_t left, right, k, lp;
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
if (r < 0)
return r;
k = journal_file_entry_array_n_items(array);
right = MIN(k, n);
if (right <= 0)
return 0;
i = right - 1;
lp = p = le64toh(array->entry_array.items[i]);
if (p <= 0)
return -EBADMSG;
r = test_object(f, p, needle);
if (r < 0)
return r;
if (r == TEST_FOUND)
r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
if (r == TEST_RIGHT) {
left = 0;
right -= 1;
for (;;) {
if (left == right) {
if (direction == DIRECTION_UP)
subtract_one = true;
i = left;
goto found;
}
assert(left < right);
i = (left + right) / 2;
p = le64toh(array->entry_array.items[i]);
if (p <= 0)
return -EBADMSG;
r = test_object(f, p, needle);
if (r < 0)
return r;
if (r == TEST_FOUND)
r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
if (r == TEST_RIGHT)
right = i;
else
left = i + 1;
}
}
if (k > n) {
if (direction == DIRECTION_UP) {
i = n;
subtract_one = true;
goto found;
}
return 0;
}
last_p = lp;
n -= k;
t += k;
a = le64toh(array->entry_array.next_entry_array_offset);
}
return 0;
found:
if (subtract_one && t == 0 && i == 0)
return 0;
if (subtract_one && i == 0)
p = last_p;
else if (subtract_one)
p = le64toh(array->entry_array.items[i-1]);
else
p = le64toh(array->entry_array.items[i]);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = p;
if (idx)
*idx = t + i + (subtract_one ? -1 : 0);
return 1;
}
static int generic_array_bisect_plus_one(JournalFile *f,
uint64_t extra,
uint64_t first,
uint64_t n,
uint64_t needle,
int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
direction_t direction,
Object **ret,
uint64_t *offset,
uint64_t *idx) {
int r;
bool step_back = false;
Object *o;
assert(f);
assert(test_object);
if (n <= 0)
return 0;
/* This bisects the array in object 'first', but first checks
* an extra */
r = test_object(f, extra, needle);
if (r < 0)
return r;
if (r == TEST_FOUND)
r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
/* if we are looking with DIRECTION_UP then we need to first
see if in the actual array there is a matching entry, and
return the last one of that. But if there isn't any we need
to return this one. Hence remember this, and return it
below. */
if (r == TEST_LEFT)
step_back = direction == DIRECTION_UP;
if (r == TEST_RIGHT) {
if (direction == DIRECTION_DOWN)
goto found;
else
return 0;
}
r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
if (r == 0 && step_back)
goto found;
if (r > 0 && idx)
(*idx) ++;
return r;
found:
r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
if (r < 0)
return r;
if (ret)
*ret = o;
if (offset)
*offset = extra;
if (idx)
*idx = 0;
return 1;
}
static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
assert(f);
assert(p > 0);
if (p == needle)
return TEST_FOUND;
else if (p < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
}
int journal_file_move_to_entry_by_offset(
JournalFile *f,
uint64_t p,
direction_t direction,
Object **ret,
uint64_t *offset) {
return generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
le64toh(f->header->n_entries),
p,
test_object_offset,
direction,
ret, offset, NULL);
}
static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
Object *o;
int r;
assert(f);
assert(p > 0);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (le64toh(o->entry.seqnum) == needle)
return TEST_FOUND;
else if (le64toh(o->entry.seqnum) < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
}
int journal_file_move_to_entry_by_seqnum(
JournalFile *f,
uint64_t seqnum,
direction_t direction,
Object **ret,
uint64_t *offset) {
return generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
le64toh(f->header->n_entries),
seqnum,
test_object_seqnum,
direction,
ret, offset, NULL);
}
static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
Object *o;
int r;
assert(f);
assert(p > 0);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (le64toh(o->entry.realtime) == needle)
return TEST_FOUND;
else if (le64toh(o->entry.realtime) < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
}
int journal_file_move_to_entry_by_realtime(
JournalFile *f,
uint64_t realtime,
direction_t direction,
Object **ret,
uint64_t *offset) {
return generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
le64toh(f->header->n_entries),
realtime,
test_object_realtime,
direction,
ret, offset, NULL);
}
static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
Object *o;
int r;
assert(f);
assert(p > 0);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
if (le64toh(o->entry.monotonic) == needle)
return TEST_FOUND;
else if (le64toh(o->entry.monotonic) < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
}
int journal_file_move_to_entry_by_monotonic(
JournalFile *f,
sd_id128_t boot_id,
uint64_t monotonic,
direction_t direction,
Object **ret,
uint64_t *offset) {
char t[9+32+1] = "_BOOT_ID=";
Object *o;
int r;
assert(f);
sd_id128_to_string(boot_id, t + 9);
r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
if (r < 0)
return r;
if (r == 0)
return -ENOENT;
return generic_array_bisect_plus_one(f,
le64toh(o->data.entry_offset),
le64toh(o->data.entry_array_offset),
le64toh(o->data.n_entries),
monotonic,
test_object_monotonic,
direction,
ret, offset, NULL);
}
int journal_file_next_entry(
JournalFile *f,
Object *o, uint64_t p,
direction_t direction,
Object **ret, uint64_t *offset) {
uint64_t i, n;
int r;
assert(f);
assert(p > 0 || !o);
n = le64toh(f->header->n_entries);
if (n <= 0)
return 0;
if (!o)
i = direction == DIRECTION_DOWN ? 0 : n - 1;
else {
if (o->object.type != OBJECT_ENTRY)
return -EINVAL;
r = generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
le64toh(f->header->n_entries),
p,
test_object_offset,
DIRECTION_DOWN,
NULL, NULL,
&i);
if (r <= 0)
return r;
if (direction == DIRECTION_DOWN) {
if (i >= n - 1)
return 0;
i++;
} else {
if (i <= 0)
return 0;
i--;
}
}
/* And jump to it */
return generic_array_get(f,
le64toh(f->header->entry_array_offset),
i,
ret, offset);
}
int journal_file_skip_entry(
JournalFile *f,
Object *o, uint64_t p,
int64_t skip,
Object **ret, uint64_t *offset) {
uint64_t i, n;
int r;
assert(f);
assert(o);
assert(p > 0);
if (o->object.type != OBJECT_ENTRY)
return -EINVAL;
r = generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
le64toh(f->header->n_entries),
p,
test_object_offset,
DIRECTION_DOWN,
NULL, NULL,
&i);
if (r <= 0)
return r;
/* Calculate new index */
if (skip < 0) {
if ((uint64_t) -skip >= i)
i = 0;
else
i = i - (uint64_t) -skip;
} else
i += (uint64_t) skip;
n = le64toh(f->header->n_entries);
if (n <= 0)
return -EBADMSG;
if (i >= n)
i = n-1;
return generic_array_get(f,
le64toh(f->header->entry_array_offset),
i,
ret, offset);
}
int journal_file_next_entry_for_data(
JournalFile *f,
Object *o, uint64_t p,
uint64_t data_offset,
direction_t direction,
Object **ret, uint64_t *offset) {
uint64_t n, i;
int r;
Object *d;
assert(f);
assert(p > 0 || !o);
r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
if (r < 0)
return r;
n = le64toh(d->data.n_entries);
if (n <= 0)
return n;
if (!o)
i = direction == DIRECTION_DOWN ? 0 : n - 1;
else {
if (o->object.type != OBJECT_ENTRY)
return -EINVAL;
r = generic_array_bisect_plus_one(f,
le64toh(d->data.entry_offset),
le64toh(d->data.entry_array_offset),
le64toh(d->data.n_entries),
p,
test_object_offset,
DIRECTION_DOWN,
NULL, NULL,
&i);
if (r <= 0)
return r;
if (direction == DIRECTION_DOWN) {
if (i >= n - 1)
return 0;
i++;
} else {
if (i <= 0)
return 0;
i--;
}
}
return generic_array_get_plus_one(f,
le64toh(d->data.entry_offset),
le64toh(d->data.entry_array_offset),
i,
ret, offset);
}
int journal_file_move_to_entry_by_offset_for_data(
JournalFile *f,
uint64_t data_offset,
uint64_t p,
direction_t direction,
Object **ret, uint64_t *offset) {
int r;
Object *d;
assert(f);
r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
if (r < 0)
return r;
return generic_array_bisect_plus_one(f,
le64toh(d->data.entry_offset),
le64toh(d->data.entry_array_offset),
le64toh(d->data.n_entries),
p,
test_object_offset,
direction,
ret, offset, NULL);
}
int journal_file_move_to_entry_by_monotonic_for_data(
JournalFile *f,
uint64_t data_offset,
sd_id128_t boot_id,
uint64_t monotonic,
direction_t direction,
Object **ret, uint64_t *offset) {
char t[9+32+1] = "_BOOT_ID=";
Object *o, *d;
int r;
uint64_t b, z;
assert(f);
/* First, seek by time */
sd_id128_to_string(boot_id, t + 9);
r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
if (r < 0)
return r;
if (r == 0)
return -ENOENT;
r = generic_array_bisect_plus_one(f,
le64toh(o->data.entry_offset),
le64toh(o->data.entry_array_offset),
le64toh(o->data.n_entries),
monotonic,
test_object_monotonic,
direction,
NULL, &z, NULL);
if (r <= 0)
return r;
/* And now, continue seeking until we find an entry that
* exists in both bisection arrays */
for (;;) {
Object *qo;
uint64_t p, q;
r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
if (r < 0)
return r;
r = generic_array_bisect_plus_one(f,
le64toh(d->data.entry_offset),
le64toh(d->data.entry_array_offset),
le64toh(d->data.n_entries),
z,
test_object_offset,
direction,
NULL, &p, NULL);
if (r <= 0)
return r;
r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
if (r < 0)
return r;
r = generic_array_bisect_plus_one(f,
le64toh(o->data.entry_offset),
le64toh(o->data.entry_array_offset),
le64toh(o->data.n_entries),
p,
test_object_offset,
direction,
&qo, &q, NULL);
if (r <= 0)
return r;
if (p == q) {
if (ret)
*ret = qo;
if (offset)
*offset = q;
return 1;
}
z = q;
}
return 0;
}
int journal_file_move_to_entry_by_seqnum_for_data(
JournalFile *f,
uint64_t data_offset,
uint64_t seqnum,
direction_t direction,
Object **ret, uint64_t *offset) {
Object *d;
int r;
assert(f);
r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
if (r < 0)
return r;
return generic_array_bisect_plus_one(f,
le64toh(d->data.entry_offset),
le64toh(d->data.entry_array_offset),
le64toh(d->data.n_entries),
seqnum,
test_object_seqnum,
direction,
ret, offset, NULL);
}
int journal_file_move_to_entry_by_realtime_for_data(
JournalFile *f,
uint64_t data_offset,
uint64_t realtime,
direction_t direction,
Object **ret, uint64_t *offset) {
Object *d;
int r;
assert(f);
r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
if (r < 0)
return r;
return generic_array_bisect_plus_one(f,
le64toh(d->data.entry_offset),
le64toh(d->data.entry_array_offset),
le64toh(d->data.n_entries),
realtime,
test_object_realtime,
direction,
ret, offset, NULL);
}
void journal_file_dump(JournalFile *f) {
Object *o;
int r;
uint64_t p;
assert(f);
journal_file_print_header(f);
p = le64toh(f->header->header_size);
while (p != 0) {
r = journal_file_move_to_object(f, -1, p, &o);
if (r < 0)
goto fail;
switch (o->object.type) {
case OBJECT_UNUSED:
printf("Type: OBJECT_UNUSED\n");
break;
case OBJECT_DATA:
printf("Type: OBJECT_DATA\n");
break;
case OBJECT_ENTRY:
printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
(unsigned long long) le64toh(o->entry.seqnum),
(unsigned long long) le64toh(o->entry.monotonic),
(unsigned long long) le64toh(o->entry.realtime));
break;
case OBJECT_FIELD_HASH_TABLE:
printf("Type: OBJECT_FIELD_HASH_TABLE\n");
break;
case OBJECT_DATA_HASH_TABLE:
printf("Type: OBJECT_DATA_HASH_TABLE\n");
break;
case OBJECT_ENTRY_ARRAY:
printf("Type: OBJECT_ENTRY_ARRAY\n");
break;
case OBJECT_TAG:
printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
(unsigned long long) le64toh(o->tag.seqnum),
(unsigned long long) le64toh(o->tag.epoch));
break;
}
if (o->object.flags & OBJECT_COMPRESSED)
printf("Flags: COMPRESSED\n");
if (p == le64toh(f->header->tail_object_offset))
p = 0;
else
p = p + ALIGN64(le64toh(o->object.size));
}
return;
fail:
log_error("File corrupt");
}
void journal_file_print_header(JournalFile *f) {
char a[33], b[33], c[33];
char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
struct stat st;
char bytes[FORMAT_BYTES_MAX];
assert(f);
printf("File Path: %s\n"
"File ID: %s\n"
"Machine ID: %s\n"
"Boot ID: %s\n"
"Sequential Number ID: %s\n"
"State: %s\n"
"Compatible Flags:%s%s\n"
"Incompatible Flags:%s%s\n"
"Header size: %llu\n"
"Arena size: %llu\n"
"Data Hash Table Size: %llu\n"
"Field Hash Table Size: %llu\n"
"Rotate Suggested: %s\n"
"Head Sequential Number: %llu\n"
"Tail Sequential Number: %llu\n"
"Head Realtime Timestamp: %s\n"
"Tail Realtime Timestamp: %s\n"
"Objects: %llu\n"
"Entry Objects: %llu\n",
f->path,
sd_id128_to_string(f->header->file_id, a),
sd_id128_to_string(f->header->machine_id, b),
sd_id128_to_string(f->header->boot_id, c),
sd_id128_to_string(f->header->seqnum_id, c),
f->header->state == STATE_OFFLINE ? "OFFLINE" :
f->header->state == STATE_ONLINE ? "ONLINE" :
f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
(le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
(le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
(unsigned long long) le64toh(f->header->header_size),
(unsigned long long) le64toh(f->header->arena_size),
(unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
(unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
yes_no(journal_file_rotate_suggested(f)),
(unsigned long long) le64toh(f->header->head_entry_seqnum),
(unsigned long long) le64toh(f->header->tail_entry_seqnum),
format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
(unsigned long long) le64toh(f->header->n_objects),
(unsigned long long) le64toh(f->header->n_entries));
if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
printf("Data Objects: %llu\n"
"Data Hash Table Fill: %.1f%%\n",
(unsigned long long) le64toh(f->header->n_data),
100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
printf("Field Objects: %llu\n"
"Field Hash Table Fill: %.1f%%\n",
(unsigned long long) le64toh(f->header->n_fields),
100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
printf("Tag Objects: %llu\n",
(unsigned long long) le64toh(f->header->n_tags));
if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
printf("Entry Array Objects: %llu\n",
(unsigned long long) le64toh(f->header->n_entry_arrays));
if (fstat(f->fd, &st) >= 0)
printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
}
int journal_file_open(
const char *fname,
int flags,
mode_t mode,
bool compress,
bool seal,
JournalMetrics *metrics,
MMapCache *mmap_cache,
JournalFile *template,
JournalFile **ret) {
JournalFile *f;
int r;
bool newly_created = false;
assert(fname);
if ((flags & O_ACCMODE) != O_RDONLY &&
(flags & O_ACCMODE) != O_RDWR)
return -EINVAL;
if (!endswith(fname, ".journal") &&
!endswith(fname, ".journal~"))
return -EINVAL;
f = new0(JournalFile, 1);
if (!f)
return -ENOMEM;
f->fd = -1;
f->mode = mode;
f->flags = flags;
f->prot = prot_from_flags(flags);
f->writable = (flags & O_ACCMODE) != O_RDONLY;
#ifdef HAVE_XZ
f->compress = compress;
#endif
f->seal = seal;
if (mmap_cache)
f->mmap = mmap_cache_ref(mmap_cache);
else {
f->mmap = mmap_cache_new();
if (!f->mmap) {
r = -ENOMEM;
goto fail;
}
}
f->path = strdup(fname);
if (!f->path) {
r = -ENOMEM;
goto fail;
}
f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
if (f->fd < 0) {
r = -errno;
goto fail;
}
if (fstat(f->fd, &f->last_stat) < 0) {
r = -errno;
goto fail;
}
if (f->last_stat.st_size == 0 && f->writable) {
newly_created = true;
#ifdef HAVE_GCRYPT
/* Try to load the FSPRG state, and if we can't, then
* just don't do sealing */
r = journal_file_fss_load(f);
if (r < 0)
f->seal = false;
#endif
r = journal_file_init_header(f, template);
if (r < 0)
goto fail;
if (fstat(f->fd, &f->last_stat) < 0) {
r = -errno;
goto fail;
}
}
if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
r = -EIO;
goto fail;
}
f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
if (f->header == MAP_FAILED) {
f->header = NULL;
r = -errno;
goto fail;
}
if (!newly_created) {
r = journal_file_verify_header(f);
if (r < 0)
goto fail;
}
#ifdef HAVE_GCRYPT
if (!newly_created && f->writable) {
r = journal_file_fss_load(f);
if (r < 0)
goto fail;
}
#endif
if (f->writable) {
if (metrics) {
journal_default_metrics(metrics, f->fd);
f->metrics = *metrics;
} else if (template)
f->metrics = template->metrics;
r = journal_file_refresh_header(f);
if (r < 0)
goto fail;
}
#ifdef HAVE_GCRYPT
r = journal_file_hmac_setup(f);
if (r < 0)
goto fail;
#endif
if (newly_created) {
r = journal_file_setup_field_hash_table(f);
if (r < 0)
goto fail;
r = journal_file_setup_data_hash_table(f);
if (r < 0)
goto fail;
#ifdef HAVE_GCRYPT
r = journal_file_append_first_tag(f);
if (r < 0)
goto fail;
#endif
}
r = journal_file_map_field_hash_table(f);
if (r < 0)
goto fail;
r = journal_file_map_data_hash_table(f);
if (r < 0)
goto fail;
if (ret)
*ret = f;
return 0;
fail:
journal_file_close(f);
return r;
}
int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
char *p;
size_t l;
JournalFile *old_file, *new_file = NULL;
int r;
assert(f);
assert(*f);
old_file = *f;
if (!old_file->writable)
return -EINVAL;
if (!endswith(old_file->path, ".journal"))
return -EINVAL;
l = strlen(old_file->path);
p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
if (!p)
return -ENOMEM;
memcpy(p, old_file->path, l - 8);
p[l-8] = '@';
sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
"-%016llx-%016llx.journal",
(unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
(unsigned long long) le64toh((*f)->header->tail_entry_realtime));
r = rename(old_file->path, p);
free(p);
if (r < 0)
return -errno;
old_file->header->state = STATE_ARCHIVED;
r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
journal_file_close(old_file);
*f = new_file;
return r;
}
int journal_file_open_reliably(
const char *fname,
int flags,
mode_t mode,
bool compress,
bool seal,
JournalMetrics *metrics,
MMapCache *mmap_cache,
JournalFile *template,
JournalFile **ret) {
int r;
size_t l;
char *p;
r = journal_file_open(fname, flags, mode, compress, seal,
metrics, mmap_cache, template, ret);
if (r != -EBADMSG && /* corrupted */
r != -ENODATA && /* truncated */
r != -EHOSTDOWN && /* other machine */
r != -EPROTONOSUPPORT && /* incompatible feature */
r != -EBUSY && /* unclean shutdown */
r != -ESHUTDOWN /* already archived */)
return r;
if ((flags & O_ACCMODE) == O_RDONLY)
return r;
if (!(flags & O_CREAT))
return r;
if (!endswith(fname, ".journal"))
return r;
/* The file is corrupted. Rotate it away and try it again (but only once) */
l = strlen(fname);
if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
(int) (l-8), fname,
(unsigned long long) now(CLOCK_REALTIME),
random_ull()) < 0)
return -ENOMEM;
r = rename(fname, p);
free(p);
if (r < 0)
return -errno;
log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
return journal_file_open(fname, flags, mode, compress, seal,
metrics, mmap_cache, template, ret);
}
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
uint64_t i, n;
uint64_t q, xor_hash = 0;
int r;
EntryItem *items;
dual_timestamp ts;
assert(from);
assert(to);
assert(o);
assert(p);
if (!to->writable)
return -EPERM;
ts.monotonic = le64toh(o->entry.monotonic);
ts.realtime = le64toh(o->entry.realtime);
if (to->tail_entry_monotonic_valid &&
ts.monotonic < le64toh(to->header->tail_entry_monotonic))
return -EINVAL;
n = journal_file_entry_n_items(o);
items = alloca(sizeof(EntryItem) * n);
for (i = 0; i < n; i++) {
uint64_t l, h;
le64_t le_hash;
size_t t;
void *data;
Object *u;
q = le64toh(o->entry.items[i].object_offset);
le_hash = o->entry.items[i].hash;
r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
if (r < 0)
return r;
if (le_hash != o->data.hash)
return -EBADMSG;
l = le64toh(o->object.size) - offsetof(Object, data.payload);
t = (size_t) l;
/* We hit the limit on 32bit machines */
if ((uint64_t) t != l)
return -E2BIG;
if (o->object.flags & OBJECT_COMPRESSED) {
#ifdef HAVE_XZ
uint64_t rsize;
if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
return -EBADMSG;
data = from->compress_buffer;
l = rsize;
#else
return -EPROTONOSUPPORT;
#endif
} else
data = o->data.payload;
r = journal_file_append_data(to, data, l, &u, &h);
if (r < 0)
return r;
xor_hash ^= le64toh(u->data.hash);
items[i].object_offset = htole64(h);
items[i].hash = u->data.hash;
r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
if (r < 0)
return r;
}
return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
}
void journal_default_metrics(JournalMetrics *m, int fd) {
uint64_t fs_size = 0;
struct statvfs ss;
char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
assert(m);
assert(fd >= 0);
if (fstatvfs(fd, &ss) >= 0)
fs_size = ss.f_frsize * ss.f_blocks;
if (m->max_use == (uint64_t) -1) {
if (fs_size > 0) {
m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
if (m->max_use > DEFAULT_MAX_USE_UPPER)
m->max_use = DEFAULT_MAX_USE_UPPER;
if (m->max_use < DEFAULT_MAX_USE_LOWER)
m->max_use = DEFAULT_MAX_USE_LOWER;
} else
m->max_use = DEFAULT_MAX_USE_LOWER;
} else {
m->max_use = PAGE_ALIGN(m->max_use);
if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
m->max_use = JOURNAL_FILE_SIZE_MIN*2;
}
if (m->max_size == (uint64_t) -1) {
m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
m->max_size = DEFAULT_MAX_SIZE_UPPER;
} else
m->max_size = PAGE_ALIGN(m->max_size);
if (m->max_size < JOURNAL_FILE_SIZE_MIN)
m->max_size = JOURNAL_FILE_SIZE_MIN;
if (m->max_size*2 > m->max_use)
m->max_use = m->max_size*2;
if (m->min_size == (uint64_t) -1)
m->min_size = JOURNAL_FILE_SIZE_MIN;
else {
m->min_size = PAGE_ALIGN(m->min_size);
if (m->min_size < JOURNAL_FILE_SIZE_MIN)
m->min_size = JOURNAL_FILE_SIZE_MIN;
if (m->min_size > m->max_size)
m->max_size = m->min_size;
}
if (m->keep_free == (uint64_t) -1) {
if (fs_size > 0) {
m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
m->keep_free = DEFAULT_KEEP_FREE_UPPER;
} else
m->keep_free = DEFAULT_KEEP_FREE;
}
log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
format_bytes(a, sizeof(a), m->max_use),
format_bytes(b, sizeof(b), m->max_size),
format_bytes(c, sizeof(c), m->min_size),
format_bytes(d, sizeof(d), m->keep_free));
}
int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
assert(f);
assert(from || to);
if (from) {
if (f->header->head_entry_realtime == 0)
return -ENOENT;
*from = le64toh(f->header->head_entry_realtime);
}
if (to) {
if (f->header->tail_entry_realtime == 0)
return -ENOENT;
*to = le64toh(f->header->tail_entry_realtime);
}
return 1;
}
int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
char t[9+32+1] = "_BOOT_ID=";
Object *o;
uint64_t p;
int r;
assert(f);
assert(from || to);
sd_id128_to_string(boot_id, t + 9);
r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
if (r <= 0)
return r;
if (le64toh(o->data.n_entries) <= 0)
return 0;
if (from) {
r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
if (r < 0)
return r;
*from = le64toh(o->entry.monotonic);
}
if (to) {
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
r = generic_array_get_plus_one(f,
le64toh(o->data.entry_offset),
le64toh(o->data.entry_array_offset),
le64toh(o->data.n_entries)-1,
&o, NULL);
if (r <= 0)
return r;
*to = le64toh(o->entry.monotonic);
}
return 1;
}
bool journal_file_rotate_suggested(JournalFile *f) {
assert(f);
/* If we gained new header fields we gained new features,
* hence suggest a rotation */
if (le64toh(f->header->header_size) < sizeof(Header)) {
log_debug("%s uses an outdated header, suggesting rotation.", f->path);
return true;
}
/* Let's check if the hash tables grew over a certain fill
* level (75%, borrowing this value from Java's hash table
* implementation), and if so suggest a rotation. To calculate
* the fill level we need the n_data field, which only exists
* in newer versions. */
if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
f->path,
100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
(unsigned long long) le64toh(f->header->n_data),
(unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
(unsigned long long) (f->last_stat.st_size),
(unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
return true;
}
if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
f->path,
100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
(unsigned long long) le64toh(f->header->n_fields),
(unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
return true;
}
return false;
}