mount-setup.c revision d1d8e5d49f3149d03ceb94d1d2f6c14e7abccb6f
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2010 Lennart Poettering
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <libgen.h>
#include <assert.h>
#include <unistd.h>
#include <ftw.h>
#include "mount-setup.h"
#include "dev-setup.h"
#include "log.h"
#include "macro.h"
#include "util.h"
#include "label.h"
#include "set.h"
#include "strv.h"
#include "mkdir.h"
#include "path-util.h"
#include "missing.h"
#include "virt.h"
#include "efivars.h"
#include "smack-util.h"
#include "def.h"
typedef enum MountMode {
MNT_NONE = 0,
MNT_FATAL = 1 << 0,
} MountMode;
typedef struct MountPoint {
const char *what;
const char *where;
const char *type;
const char *options;
unsigned long flags;
bool (*condition_fn)(void);
} MountPoint;
/* The first three entries we might need before SELinux is up. The
* fourth (securityfs) is needed by IMA to load a custom policy. The
* other ones we can delay until SELinux and IMA are loaded. */
#define N_EARLY_MOUNT 5
static const MountPoint mount_table[] = {
#ifdef HAVE_SMACK
{ "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
#endif
NULL, MNT_IN_CONTAINER },
#ifdef HAVE_SMACK
#endif
{ "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
#ifdef ENABLE_EFI
is_efi_boot, MNT_NONE },
#endif
};
/* These are API file systems that might be mounted by other software,
* we just list them here so that we know that we should ignore them */
static const char ignore_paths[] =
/* SELinux file systems */
"/selinux\0"
/* Legacy cgroup mount points */
"/cgroup\0"
/* Legacy kernel file system */
/* Container bind mounts */
bool mount_point_is_api(const char *path) {
unsigned i;
/* Checks if this mount point is considered "API", and hence
* should be ignored */
for (i = 0; i < ELEMENTSOF(mount_table); i ++)
return true;
}
bool mount_point_ignore(const char *path) {
const char *i;
if (path_equal(path, i))
return true;
return false;
}
int r;
assert(p);
if (p->condition_fn && !p->condition_fn())
return 0;
/* Relabel first, just in case */
if (relabel)
r = path_is_mount_point(p->where, true);
if (r < 0)
return r;
if (r > 0)
return 0;
/* Skip securityfs in a container */
return 0;
/* The access mode here doesn't really matter too much, since
* the mounted file system will take precedence anyway. */
if (relabel)
else
log_debug("Mounting %s to %s of type %s with options %s.",
p->what,
p->where,
p->type,
p->where,
p->type,
p->flags,
p->options) < 0) {
log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s at %s: %m", p->type, p->where);
}
/* Relabel again, since we now mounted something fresh here */
if (relabel)
return 1;
}
int mount_setup_early(void) {
unsigned i;
int r = 0;
/* Do a minimal mount of /proc and friends to enable the most
* basic stuff, such as SELinux */
for (i = 0; i < N_EARLY_MOUNT; i ++) {
int j;
j = mount_one(mount_table + i, false);
if (r == 0)
r = j;
}
return r;
}
int mount_cgroup_controllers(char ***join_controllers) {
_cleanup_fclose_ FILE *f;
int r;
/* Mount all available cgroup controllers that are built into the kernel. */
if (!f) {
log_error("Failed to enumerate cgroup controllers: %m");
return 0;
}
if (!controllers)
return log_oom();
/* Ignore the header line */
for (;;) {
char *controller;
int enabled = 0;
if (feof(f))
break;
return -EIO;
}
if (!enabled) {
continue;
}
if (r < 0) {
log_error("Failed to add controller to set.");
return r;
}
}
for (;;) {
MountPoint p = {
.what = "cgroup",
.type = "cgroup",
.mode = MNT_IN_CONTAINER,
};
char ***k = NULL;
if (!controller)
break;
if (join_controllers)
for (k = join_controllers; *k; k++)
if (strv_find(*k, controller))
break;
if (k && *k) {
char **i, **j;
for (i = *k, j = *k; *i; i++) {
if (!streq(*i, controller)) {
_cleanup_free_ char *t;
t = set_remove(controllers, *i);
if (!t) {
free(*i);
continue;
}
}
*(j++) = *i;
}
*j = NULL;
if (!options)
return log_oom();
} else {
controller = NULL;
}
if (!where)
return log_oom();
r = mount_one(&p, true);
if (r < 0)
return r;
if (r > 0 && k && *k) {
char **i;
for (i = *k; *i; i++) {
_cleanup_free_ char *t = NULL;
if (!t)
return log_oom();
log_error("Failed to create symlink %s: %m", t);
return -errno;
}
}
}
}
/* Now that we mounted everything, let's make the tmpfs the
* cgroup file systems are mounted into read-only. */
mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
return 0;
}
static int nftw_cb(
const char *fpath,
int tflag,
/* No need to label /dev twice in a row... */
return FTW_CONTINUE;
* dynamically relabel its contents at boot... */
return FTW_SKIP_SUBTREE;
return FTW_CONTINUE;
};
int mount_setup(bool loaded_policy) {
int r;
unsigned i;
for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
r = mount_one(mount_table + i, true);
if (r < 0)
return r;
}
/* Nodes in devtmpfs and /run need to be manually updated for
* the appropriate labels, after mounting. The other virtual
* API file systems like /sys and /proc do not need that, they
* use the same label for all their files. */
if (loaded_policy) {
char timespan[FORMAT_TIMESPAN_MAX];
log_info("Relabelled /dev and /run in %s.",
}
/* Create a few default symlinks, which are normally created
* by udevd, but some scripts might need them before we start
* udevd. */
/* Mark the root directory as shared in regards to mount
* propagation. The kernel defaults to "private", but we think
* it makes more sense to have a default of "shared" so that
* nspawn and the container tools work out of the box. If
* specific setups need other settings they can reset the
* propagation mode to private if needed. */
if (detect_container(NULL) <= 0)
log_warning("Failed to set up the root directory for shared mount propagation: %m");
/* Create a few directories we always want around, Note that
* really needs to stay for good, otherwise software that
* copied sd-daemon.c into their sources will misdetect
* systemd. */
mkdir_label("/run/systemd/inaccessible", 0000);
return 0;
}