/***
This file is part of systemd.
Copyright 2014 Lennart Poettering
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/sysmacros.h>
#include <unistd.h>
#ifdef HAVE_LINUX_BTRFS_H
#endif
#include "alloc-util.h"
#include "btrfs-ctree.h"
#include "btrfs-util.h"
#include "copy.h"
#include "fd-util.h"
#include "fileio.h"
#include "io-util.h"
#include "macro.h"
#include "missing.h"
#include "path-util.h"
#include "selinux-util.h"
#include "smack-util.h"
#include "sparse-endian.h"
#include "stat-util.h"
#include "string-util.h"
#include "time-util.h"
#include "util.h"
/* WARNING: Be careful with file system ioctls! When we get an fd, we
* need to make sure it either refers to only a regular file or
* directory, or that it is located on btrfs, before invoking any
* btrfs ioctls. The ioctl numbers are reused by some device drivers
* (such as DRM), and hence might have bad effects when invoked on
* device nodes (that reference drivers) rather than fds to normal
* files or directories. */
if (!filename_is_valid(name))
return -EINVAL;
return -E2BIG;
return 0;
}
int fd;
if (!parent)
return -ENOMEM;
if (fd < 0)
return -errno;
return fd;
}
const char *fn;
int r;
r = validate_subvolume_name(fn);
if (r < 0)
return r;
return 0;
}
return -errno;
}
/* On btrfs subvolumes always have the inode 256 */
return -errno;
return 0;
return btrfs_is_filesystem(fd);
}
if (fd < 0)
return -errno;
return btrfs_is_subvol_fd(fd);
}
const char *subvolume;
int r;
if (r < 0)
return r;
if (fd < 0)
return fd;
return -errno;
return 0;
}
int r;
if (r < 0)
return r;
r = btrfs_subvol_make(path);
if (r < 0)
return r;
return mac_smack_fix(path, false, false);
}
return -errno;
return -EINVAL;
return -errno;
if (b)
else
return 0;
return -errno;
return 0;
}
if (fd < 0)
return -errno;
return btrfs_subvol_set_read_only_fd(fd, b);
}
return -errno;
return -EINVAL;
return -errno;
return !!(flags & BTRFS_SUBVOL_RDONLY);
}
int r;
/* Make sure we invoke the ioctl on a regular file, so that no
* device driver accidentally gets it. */
return -errno;
return -EINVAL;
if (r < 0)
return -errno;
return 0;
}
.src_offset = in_offset,
.src_length = sz,
};
int r;
return -errno;
return -EINVAL;
if (r < 0)
return -errno;
return 0;
}
int r;
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (!r)
return -ENOTTY;
return -errno;
/* We won't do this for btrfs RAID */
return 0;
};
continue;
return -errno;
}
return -errno;
return -ENODEV;
return -ENODEV;
return 1;
}
return -ENODEV;
}
if (fd < 0)
return -errno;
}
};
int r;
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (!r)
return -ENOTTY;
return -errno;
return 0;
}
if (subvol_fd < 0)
return -errno;
}
/* the objectid, type, offset together make up the btrfs key,
* which is considered a single 136byte integer when
* comparing. This call increases the counter by one, dealing
* with the overflow between the overflows */
return true;
}
return true;
}
return true;
}
return 0;
}
static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
assert(h);
}
/* Compare min and max */
return -1;
return 1;
return -1;
return 1;
return -1;
return 1;
return 0;
}
for ((i) = 0, \
(i)++, \
(sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
/* Tree of tree roots */
/* Look precisely for the subvolume items */
.key.min_offset = 0,
/* No restrictions on the other components */
.key.min_transid = 0,
};
bool found = false;
int r;
if (subvol_id == 0) {
if (r < 0)
return r;
} else {
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (!r)
return -ENOTTY;
}
while (btrfs_ioctl_search_args_compare(&args) <= 0) {
unsigned i;
return -errno;
break;
/* Make sure we start the next search at least from this entry */
continue;
continue;
/* Older versions of the struct lacked the otime setting */
continue;
found = true;
goto finish;
}
/* Increase search key by one, to read the next item, if we can. */
if (!btrfs_ioctl_search_args_inc(&args))
break;
}
if (!found)
return -ENODATA;
return 0;
}
/* Tree of quota items */
/* The object ID is always 0 */
.key.min_objectid = 0,
.key.max_objectid = 0,
/* Look precisely for the quota items */
/* No restrictions on the other components */
.key.min_transid = 0,
};
int r;
if (qgroupid == 0) {
if (r < 0)
return r;
} else {
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (!r)
return -ENOTTY;
}
while (btrfs_ioctl_search_args_compare(&args) <= 0) {
unsigned i;
break;
return -errno;
}
break;
/* Make sure we start the next search at least from this entry */
continue;
continue;
found_info = true;
else
else
found_limit = true;
}
if (found_info && found_limit)
goto finish;
}
/* Increase search key by one, to read the next item, if we can. */
if (!btrfs_ioctl_search_args_inc(&args))
break;
}
if (!found_limit && !found_info)
return -ENODATA;
if (!found_info) {
}
if (!found_limit) {
}
return 0;
}
if (fd < 0)
return -errno;
}
int r, n, i;
/* This finds the "subtree" qgroup for a specific
* subvolume. This only works for subvolumes that have been
* prepared with btrfs_subvol_auto_qgroup_fd() with
* insert_intermediary_qgroup=true (or equivalent). For others
* it will return the leaf qgroup instead. The two cases may
* be distuingished via the return value, which is 1 in case
* an appropriate "subtree" qgroup was found, and 0
* otherwise. */
if (subvol_id == 0) {
if (r < 0)
return r;
}
if (r < 0)
return r;
if (level != 0) /* Input must be a leaf qgroup */
return -EINVAL;
if (n < 0)
return n;
for (i = 0; i < n; i++) {
if (r < 0)
return r;
continue;
lowest_qgroupid = qgroups[i];
}
}
/* No suitable higher-level qgroup found, let's return
* the leaf qgroup instead, and indicate that with the
* return value. */
return 0;
}
*ret = lowest_qgroupid;
return 1;
}
int r;
/* This determines the quota data of the qgroup with the
* lowest level, that shares the id part with the specified
* subvolume. This is useful for determining the quota data
* for entire subvolume subtrees, as long as the subtrees have
* been set up with btrfs_qgroup_subvol_auto_fd() or in a
* compatible way */
if (r < 0)
return r;
}
if (fd < 0)
return -errno;
}
return -errno;
return -EINVAL;
return -errno;
return 0;
}
int btrfs_defrag(const char *p) {
if (fd < 0)
return -errno;
return btrfs_defrag_fd(fd);
}
};
int r;
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (!r)
return -ENOTTY;
return -errno;
return 0;
}
if (fd < 0)
return -errno;
return btrfs_quota_enable_fd(fd, b);
}
};
unsigned c;
int r;
if (qgroupid == 0) {
if (r < 0)
return r;
} else {
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (!r)
return -ENOTTY;
}
for (c = 0;; c++) {
(void) btrfs_quota_scan_wait(fd);
continue;
}
return -errno;
}
break;
}
return 0;
}
if (fd < 0)
return -errno;
}
int r;
if (r < 0)
return r;
}
int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
if (fd < 0)
return -errno;
}
int r;
/* In contrast to btrfs quota ioctls ftruncate() cannot make sense of "infinity" or file sizes > 2^31 */
if (!FILE_SIZE_VALID(new_size))
return -EINVAL;
/* btrfs cannot handle file systems < 16M, hence use this as minimum */
if (r < 0)
return r;
if (r == 0)
return -ENODEV;
return -ENOMEM;
r = read_one_line_file(p, &backing);
if (r == -ENOENT)
return -ENODEV;
if (r < 0)
return r;
return -ENODEV;
if (backing_fd < 0)
return -errno;
return -errno;
return -ENODEV;
return 0;
return -EINVAL;
return -ENOMEM;
if (loop_fd < 0)
return -errno;
return -EINVAL;
/* Decrease size: first decrease btrfs size, then shorten loopback */
return -errno;
}
return -errno;
return -errno;
/* Increase size: first enlarge loopback, then increase btrfs size */
return -errno;
}
/* Make sure the free disk space is correctly updated for both file systems */
(void) fsync(backing_fd);
return 1;
}
if (fd < 0)
return -errno;
}
return -EINVAL;
return -EINVAL;
return 0;
}
if (level)
if (id)
return 0;
}
.create = b,
};
unsigned c;
int r;
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (r == 0)
return -ENOTTY;
for (c = 0;; c++) {
/* If quota is not enabled, we get EINVAL. Turn this into a recognizable error */
return -ENOPROTOOPT;
(void) btrfs_quota_scan_wait(fd);
continue;
}
return -errno;
}
break;
}
return 0;
}
}
}
int i, n, r;
/* Destroys the specified qgroup, but unassigns it from all
* its parents first. Also, it recursively destroys all
* qgroups it is assgined to that have the same id part of the
* qgroupid as the specified group. */
if (r < 0)
return r;
if (n < 0)
return n;
for (i = 0; i < n; i++) {
if (r < 0)
return r;
if (r < 0)
return r;
continue;
/* The parent qgroupid shares the same id part with
* us? If so, destroy it too. */
}
}
return -errno;
return 0;
}
return -errno;
return 0;
}
return -errno;
}
.assign = b,
};
unsigned c;
int r;
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (r == 0)
return -ENOTTY;
for (c = 0;; c++) {
if (r < 0) {
(void) btrfs_quota_scan_wait(fd);
continue;
}
return -errno;
}
if (r == 0)
return 0;
/* If the return value is > 0, we need to request a rescan */
(void) btrfs_quota_scan_start(fd);
return 1;
}
}
}
}
static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
.key.min_transid = 0,
};
bool made_writable = false;
int r;
return -errno;
return -EINVAL;
if (subvol_fd < 0)
return -errno;
if (subvol_id == 0) {
if (r < 0)
return r;
}
/* First, try to remove the subvolume. If it happens to be
* already empty, this will just work. */
(void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
return 0;
}
return -errno;
/* OK, the subvolume is not empty, let's look for child
* subvolumes, and remove them, first */
while (btrfs_ioctl_search_args_compare(&args) <= 0) {
unsigned i;
return -errno;
break;
_cleanup_free_ char *p = NULL;
continue;
continue;
if (!p)
return -ENOMEM;
return -errno;
if (!made_writable) {
r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
if (r < 0)
return r;
made_writable = true;
}
/* Subvolume is in the top-level
* directory of the subvolume. */
else {
/* Subvolume is somewhere further down,
* hence we need to open the
* containing directory first */
if (child_fd < 0)
return -errno;
}
if (r < 0)
return r;
}
/* Increase search key by one, to read the next item, if we can. */
if (!btrfs_ioctl_search_args_inc(&args))
break;
}
/* OK, the child subvolumes should all be gone now, let's try
* again to remove the subvolume */
return -errno;
return 0;
}
const char *subvolume;
int r;
if (r < 0)
return r;
if (fd < 0)
return fd;
}
}
/* Tree of quota items */
/* The object ID is always 0 */
.key.min_objectid = 0,
.key.max_objectid = 0,
/* Look precisely for the quota items */
/* For our qgroup */
/* No restrictions on the other components */
.key.min_transid = 0,
};
int r;
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (!r)
return -ENOTTY;
while (btrfs_ioctl_search_args_compare(&args) <= 0) {
unsigned i;
break;
return -errno;
}
break;
unsigned c;
/* Make sure we start the next search at least from this entry */
continue;
continue;
continue;
/* We found the entry, now copy things over. */
qargs = (struct btrfs_ioctl_qgroup_limit_args) {
.qgroupid = new_qgroupid,
};
for (c = 0;; c++) {
(void) btrfs_quota_scan_wait(fd);
continue;
}
return -errno;
}
break;
}
return 1;
}
/* Increase search key by one, to read the next item, if we can. */
if (!btrfs_ioctl_search_args_inc(&args))
break;
}
return 0;
}
/* Copies a reduced form of quota information from the old to
* the new subvolume. */
if (n_old_qgroups <= 0) /* Nothing to copy */
return n_old_qgroups;
if (r == -ENXIO)
/* We have no parent, hence nothing to copy. */
n_old_parent_qgroups = 0;
else if (r < 0)
return r;
else {
if (n_old_parent_qgroups < 0)
return n_old_parent_qgroups;
}
for (i = 0; i < n_old_qgroups; i++) {
int j;
if (r < 0)
return r;
if (id == old_subvol_id) {
/* The old subvolume was member of a qgroup
* that had the same id, but a different level
* as it self. Let's set up something similar
* in the destination. */
insert_intermediary_qgroup = true;
break;
}
for (j = 0; j < n_old_parent_qgroups; j++)
if (old_parent_qgroups[j] == old_qgroups[i]) {
/* The old subvolume shared a common
* parent qgroup with its parent
* subvolume. Let's set up something
* similar in the destination. */
copy_from_parent = true;
}
}
if (!insert_intermediary_qgroup && !copy_from_parent)
return 0;
}
bool changed;
int r;
/* First copy the leaf limits */
if (r < 0)
return r;
changed = r > 0;
/* Then, try to copy the subtree limits, if there are any. */
if (r < 0)
return r;
if (r == 0)
return changed;
if (r < 0)
return r;
if (r == 0)
return changed;
if (r != 0)
return r;
return changed;
}
static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolume, uint64_t old_subvol_id, BtrfsSnapshotFlags flags) {
.key.min_transid = 0,
};
};
int r;
return -errno;
if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
!(flags & BTRFS_SNAPSHOT_QUOTA))
return 0;
if (old_subvol_id == 0) {
if (r < 0)
return r;
}
if (r < 0)
return r;
if (flags & BTRFS_SNAPSHOT_QUOTA)
if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
if (flags & BTRFS_SNAPSHOT_QUOTA)
return 0;
}
while (btrfs_ioctl_search_args_compare(&args) <= 0) {
unsigned i;
return -errno;
break;
continue;
/* Avoid finding the source subvolume a second
* time */
continue;
/* Avoid running into loops if the new
* subvolume is below the old one. */
continue;
if (!p)
return -ENOMEM;
return -errno;
/* The kernel returns an empty name if the
* subvolume is in the top-level directory,
* and otherwise appends a slash, so that we
* can just concatenate easily here, without
* adding a slash. */
if (!c)
return -ENOMEM;
if (old_child_fd < 0)
return -errno;
if (!np)
return -ENOMEM;
if (new_child_fd < 0)
return -errno;
if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
/* If the snapshot is read-only we
* need to mark it writable
* temporarily, to put the subsnapshot
* into place. */
if (subvolume_fd < 0) {
if (subvolume_fd < 0)
return -errno;
}
r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
if (r < 0)
return r;
}
/* When btrfs clones the subvolumes, child
* subvolumes appear as empty directories. Remove
* them, so that we can create a new snapshot
* in their place */
int k = -errno;
if (flags & BTRFS_SNAPSHOT_READ_ONLY)
(void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
return k;
}
r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
/* Restore the readonly flag */
if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
int k;
k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
if (r >= 0 && k < 0)
return k;
}
if (r < 0)
return r;
}
/* Increase search key by one, to read the next item, if we can. */
if (!btrfs_ioctl_search_args_inc(&args))
break;
}
if (flags & BTRFS_SNAPSHOT_QUOTA)
return 0;
}
const char *subvolume;
int r;
r = btrfs_is_subvol_fd(old_fd);
if (r < 0)
return r;
if (r == 0) {
if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
return -EISDIR;
r = btrfs_subvol_make(new_path);
if (r < 0)
return r;
if (r < 0) {
return r;
}
if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
r = btrfs_subvol_set_read_only(new_path, true);
if (r < 0) {
return r;
}
}
return 0;
}
if (r < 0)
return r;
if (new_fd < 0)
return new_fd;
}
if (old_fd < 0)
return -errno;
}
/* Tree of quota items */
/* Look precisely for the quota relation items */
/* No restrictions on the other components */
.key.min_offset = 0,
.key.min_transid = 0,
};
int r;
if (qgroupid == 0) {
if (r < 0)
return r;
} else {
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (!r)
return -ENOTTY;
}
while (btrfs_ioctl_search_args_compare(&args) <= 0) {
unsigned i;
break;
return -errno;
}
break;
/* Make sure we start the next search at least from this entry */
continue;
continue;
continue;
return -ENOMEM;
}
/* Increase search key by one, to read the next item, if we can. */
if (!btrfs_ioctl_search_args_inc(&args))
break;
}
if (n_items <= 0) {
return 0;
}
return (int) n_items;
}
bool changed = false;
int n = 0, r;
/*
* Sets up the specified subvolume's qgroup automatically in
* one of two ways:
*
* If insert_intermediary_qgroup is false, the subvolume's
* leaf qgroup will be assigned to the same parent qgroups as
* the subvolume's parent subvolume.
*
* If insert_intermediary_qgroup is true a new intermediary
* higher-level qgroup is created, with a higher level number,
* but reusing the id of the subvolume. The level number is
* picked as one smaller than the lowest level qgroup the
* parent subvolume is a member of. If the parent subvolume's
* leaf qgroup is assigned to no higher-level qgroup a new
* qgroup of level 255 is created instead. Either way, the new
* qgroup is then assigned to the parent's higher-level
* qgroup, and the subvolume itself is assigned to it.
*
* If the subvolume is already assigned to a higher level
* qgroup, no operation is executed.
*
* Effectively this means: regardless if
* insert_intermediary_qgroup is true or not, after this
* function is invoked the subvolume will be accounted within
* the same qgroups as the parent. However, if it is true, it
* will also get its own higher-level qgroup, which may in
* turn be used by subvolumes created beneath this subvolume
* later on.
*
* This hence defines a simple default qgroup setup for
* subvolumes, as long as this function is invoked on each
* created subvolume: each subvolume is always accounting
* together with its immediate parents. Optionally, if
* insert_intermediary_qgroup is true, it will also get a
* qgroup that then includes all its own child subvolumes.
*/
if (subvol_id == 0) {
r = btrfs_is_subvol_fd(fd);
if (r < 0)
return r;
if (!r)
return -ENOTTY;
if (r < 0)
return r;
}
if (n < 0)
return n;
if (n > 0) /* already parent qgroups set up, let's bail */
return 0;
if (r == -ENXIO)
/* No parent, hence no qgroup memberships */
n = 0;
else if (r < 0)
return r;
else {
if (n < 0)
return n;
}
if (insert_intermediary_qgroup) {
bool created = false;
int i;
/* Determine the lowest qgroup that the parent
* subvolume is assigned to. */
for (i = 0; i < n; i++) {
if (r < 0)
return r;
}
return -EBUSY;
if (r < 0)
return r;
/* Create the new intermediary group, unless it already exists */
if (r < 0 && r != -EEXIST)
return r;
if (r >= 0)
for (i = 0; i < n; i++) {
if (r < 0 && r != -EEXIST) {
if (created)
return r;
}
if (r >= 0)
changed = true;
}
if (r < 0 && r != -EEXIST) {
if (created)
return r;
}
if (r >= 0)
changed = true;
} else {
int i;
/* Assign our subvolume to all the same qgroups as the parent */
for (i = 0; i < n; i++) {
if (r < 0 && r != -EEXIST)
return r;
if (r >= 0)
changed = true;
}
}
return changed;
}
int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
if (fd < 0)
return -errno;
}
/* Tree of tree roots */
/* Look precisely for the subvolume items */
/* No restrictions on the other components */
.key.min_offset = 0,
.key.min_transid = 0,
};
int r;
if (subvol_id == 0) {
if (r < 0)
return r;
} else {
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (!r)
return -ENOTTY;
}
while (btrfs_ioctl_search_args_compare(&args) <= 0) {
unsigned i;
return negative_errno();
break;
continue;
continue;
return 0;
}
}
return -ENXIO;
}