mount-setup.c revision 1411b09467c90ae358656d14165311090a2e175e
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2010 Lennart Poettering
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <errno.h>
#include <ftw.h>
#include <stdlib.h>
#include <unistd.h>
#include "alloc-util.h"
#include "bus-util.h"
#include "cgroup-util.h"
#include "dev-setup.h"
#include "efivars.h"
#include "label.h"
#include "log.h"
#include "macro.h"
#include "missing.h"
#include "mkdir.h"
#include "mount-setup.h"
#include "mount-util.h"
#include "path-util.h"
#include "set.h"
#include "smack-util.h"
#include "strv.h"
#include "user-util.h"
#include "util.h"
#include "virt.h"
typedef enum MountMode {
MNT_NONE = 0,
MNT_FATAL = 1 << 0,
} MountMode;
typedef struct MountPoint {
const char *what;
const char *where;
const char *type;
const char *options;
unsigned long flags;
bool (*condition_fn)(void);
} MountPoint;
/* The first three entries we might need before SELinux is up. The
* fourth (securityfs) is needed by IMA to load a custom policy. The
* other ones we can delay until SELinux and IMA are loaded. When
* SMACK is enabled we need smackfs, too, so it's a fifth one. */
#ifdef HAVE_SMACK
#define N_EARLY_MOUNT 5
#else
#define N_EARLY_MOUNT 4
#endif
static const MountPoint mount_table[] = {
#ifdef HAVE_SMACK
#endif
NULL, MNT_IN_CONTAINER },
#ifdef HAVE_SMACK
#endif
{ "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
#ifdef ENABLE_EFI
is_efi_boot, MNT_NONE },
#endif
};
/* These are API file systems that might be mounted by other software,
* we just list them here so that we know that we should ignore them */
static const char ignore_paths[] =
/* SELinux file systems */
/* Container bind mounts */
bool mount_point_is_api(const char *path) {
unsigned i;
/* Checks if this mount point is considered "API", and hence
* should be ignored */
for (i = 0; i < ELEMENTSOF(mount_table); i ++)
return true;
}
bool mount_point_ignore(const char *path) {
const char *i;
if (path_equal(path, i))
return true;
return false;
}
int r;
assert(p);
if (p->condition_fn && !p->condition_fn())
return 0;
/* Relabel first, just in case */
if (relabel)
if (r < 0 && r != -ENOENT) {
log_full_errno((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, r, "Failed to determine whether %s is a mount point: %m", p->where);
}
if (r > 0)
return 0;
/* Skip securityfs in a container */
return 0;
/* The access mode here doesn't really matter too much, since
* the mounted file system will take precedence anyway. */
if (relabel)
else
log_debug("Mounting %s to %s of type %s with options %s.",
p->what,
p->where,
p->type,
p->where,
p->type,
p->flags,
p->options) < 0) {
log_full_errno((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, errno, "Failed to mount %s at %s: %m", p->type, p->where);
}
/* Relabel again, since we now mounted something fresh here */
if (relabel)
return 1;
}
static int mount_points_setup(unsigned n, bool loaded_policy) {
unsigned i;
int r = 0;
for (i = 0; i < n; i ++) {
int j;
if (j != 0 && r >= 0)
r = j;
}
return r;
}
int mount_setup_early(void) {
/* Do a minimal mount of /proc and friends to enable the most
* basic stuff, such as SELinux */
return mount_points_setup(N_EARLY_MOUNT, false);
}
int mount_cgroup_controllers(char ***join_controllers) {
int r;
if (!cg_is_legacy_wanted())
return 0;
/* Mount all available cgroup controllers that are built into the kernel. */
if (!controllers)
return log_oom();
if (r < 0)
return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
for (;;) {
MountPoint p = {
.what = "cgroup",
.type = "cgroup",
.mode = MNT_IN_CONTAINER,
};
char ***k = NULL;
if (!controller)
break;
if (join_controllers)
for (k = join_controllers; *k; k++)
if (strv_find(*k, controller))
break;
if (k && *k) {
char **i, **j;
for (i = *k, j = *k; *i; i++) {
if (!streq(*i, controller)) {
_cleanup_free_ char *t;
t = set_remove(controllers, *i);
if (!t) {
free(*i);
continue;
}
}
*(j++) = *i;
}
*j = NULL;
if (!options)
return log_oom();
} else {
controller = NULL;
}
if (!where)
return log_oom();
r = mount_one(&p, true);
if (r < 0)
return r;
if (r > 0 && k && *k) {
char **i;
for (i = *k; *i; i++) {
_cleanup_free_ char *t = NULL;
if (!t)
return log_oom();
if (r >= 0) {
#ifdef SMACK_RUN_LABEL
_cleanup_free_ char *src;
if (!src)
return log_oom();
r = mac_smack_copy(t, src);
if (r < 0 && r != -EOPNOTSUPP)
#endif
}
}
}
/* Now that we mounted everything, let's make the tmpfs the
* cgroup file systems are mounted into read-only. */
(void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
return 0;
}
#if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
static int nftw_cb(
const char *fpath,
int tflag,
/* No need to label /dev twice in a row... */
return FTW_CONTINUE;
* dynamically relabel its contents at boot... */
return FTW_SKIP_SUBTREE;
return FTW_CONTINUE;
};
#endif
int mount_setup(bool loaded_policy) {
int r = 0;
if (r < 0)
return r;
#if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
/* Nodes in devtmpfs and /run need to be manually updated for
* the appropriate labels, after mounting. The other virtual
* API file systems like /sys and /proc do not need that, they
* use the same label for all their files. */
if (loaded_policy) {
char timespan[FORMAT_TIMESPAN_MAX];
log_info("Relabelled /dev and /run in %s.",
}
#endif
/* Create a few default symlinks, which are normally created
* by udevd, but some scripts might need them before we start
* udevd. */
/* Mark the root directory as shared in regards to mount
* propagation. The kernel defaults to "private", but we think
* it makes more sense to have a default of "shared" so that
* nspawn and the container tools work out of the box. If
* specific setups need other settings they can reset the
* propagation mode to private if needed. */
if (detect_container() <= 0)
/* Create a few directories we always want around, Note that
* really needs to stay for good, otherwise software that
* copied sd-daemon.c into their sources will misdetect
* systemd. */
mkdir_label("/run/systemd/inaccessible", 0000);
return 0;
}