nspawn.c revision 189d5bac5c45a6a735489541e285dec8bfc1d38d
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen This file is part of systemd.
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen Copyright 2010 Lennart Poettering
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen systemd is free software; you can redistribute it and/or modify it
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen under the terms of the GNU Lesser General Public License as published by
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen the Free Software Foundation; either version 2.1 of the License, or
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen (at your option) any later version.
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen systemd is distributed in the hope that it will be useful, but
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen WITHOUT ANY WARRANTY; without even the implied warranty of
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen Lesser General Public License for more details.
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen You should have received a copy of the GNU Lesser General Public License
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen along with systemd; If not, see <http://www.gnu.org/licenses/>.
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersenstatic const char *arg_selinux_apifs_context = NULL;
1a436809498faf6486815baa0338fb6b8e5def07Tom Gundersenstatic bool arg_private_network = false;
1a436809498faf6486815baa0338fb6b8e5def07Tom Gundersenstatic bool arg_read_only = false;
6a0a2f860f1ca4a10932da3b41dbc3c0139757ccTom Gundersenstatic bool arg_boot = false;
cb9fc36a1211967e8c58b0502a26c42552ac8060Lennart Poetteringstatic bool arg_ephemeral = false;
5be4d38e31281727b6f45ae869136bb01a1f7790Tom Gundersenstatic LinkJournal arg_link_journal = LINK_AUTO;
bcb7a07e0a785bda1eed658e984ff6b4a11cba9aTom Gundersenstatic bool arg_link_journal_try = false;
6ae115c1fe95611b39d2f20cfcea3d385429f59eTom Gundersenstatic unsigned arg_n_custom_mounts = 0;
6ae115c1fe95611b39d2f20cfcea3d385429f59eTom Gundersenstatic bool arg_quiet = false;
67272d157a35e5cda4e5c904eafdcc23d20541d1Tom Gundersenstatic bool arg_share_system = false;
6192b846ca0d15602e94ddb5da4420b7c60d64a5Tom Gundersenstatic bool arg_register = true;
06f021a8048583d66202e3ac5cd0a12386d33ac2Tom Gundersenstatic bool arg_keep_unit = false;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersenstatic bool arg_network_veth = false;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersenstatic unsigned long arg_personality = PERSONALITY_INVALID;
16aa63a00b5b1db23a9c0b8de350ebf482d90cd0Tom Gundersenstatic VolatileMode arg_volatile_mode = VOLATILE_NO;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersenstatic uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
5c1d3fc93d91384bbac29adf01074fa4375317eaUmut Tezduyar Lindskogstatic bool arg_userns = false;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersenstatic bool arg_unified_cgroup_hierarchy = false;
5d3de3fe9cc452f1bfe3c2dcafecbd7f904da4dcLennart Poetteringstatic void help(void) {
c081882f07617e56bcbce54105068137e4e0bb52Susant Sahani printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen "Spawn a minimal namespace container for debugging, testing and building.\n\n"
fd6d906cf46ecef45ebbb5a361e1dc436f031d7aLennart Poettering " -h --help Show this help\n"
5a8bcb674f71a20e95df55319b34c556638378ceLennart Poettering " --version Print version string\n"
3d3d425547a3f38473fcf8737b85dfebb630479dTom Gundersen " -q --quiet Do not show status information\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " -D --directory=PATH Root directory for the container\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --template=PATH Initialize root directory from template directory,\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " if missing\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " -x --ephemeral Run container with snapshot of root directory, and\n"
16aa63a00b5b1db23a9c0b8de350ebf482d90cd0Tom Gundersen " remove it after exit\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " -i --image=PATH File system device or disk image for the container\n"
0dd25fb9f005d8ab7ac4bc10a609d00569f8c56aLennart Poettering " -b --boot Boot up full system (i.e. invoke init)\n"
6ae115c1fe95611b39d2f20cfcea3d385429f59eTom Gundersen " -u --user=USER Run the command under specified user or uid\n"
9e7e440835c43d81ffdbc299d2c07daaa641ed50Tom Gundersen " -M --machine=NAME Set the machine name for the container\n"
5c1d3fc93d91384bbac29adf01074fa4375317eaUmut Tezduyar Lindskog " --uuid=UUID Set a specific machine UUID for the container\n"
5c1d3fc93d91384bbac29adf01074fa4375317eaUmut Tezduyar Lindskog " -S --slice=SLICE Place the container in the specified slice\n"
28cc555d8504c9429776aedbbe1fee7101258578Dan Williams " --property=NAME=VALUE Set scope unit property\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --private-users[=UIDBASE[:NUIDS]]\n"
5d3de3fe9cc452f1bfe3c2dcafecbd7f904da4dcLennart Poettering " Run within user namespace\n"
5d3de3fe9cc452f1bfe3c2dcafecbd7f904da4dcLennart Poettering " --private-network Disable network in container\n"
9e7e440835c43d81ffdbc299d2c07daaa641ed50Tom Gundersen " --network-interface=INTERFACE\n"
46b0c76e2c355c0d0cc4792abb98cde07b28bc53Emil Renner Berthing " Assign an existing network interface to the\n"
6ae115c1fe95611b39d2f20cfcea3d385429f59eTom Gundersen " container\n"
3d3d425547a3f38473fcf8737b85dfebb630479dTom Gundersen " --network-macvlan=INTERFACE\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " Create a macvlan network interface based on an\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " existing network interface to the container\n"
11bf3cced13c885ca215c108cb0bdb7a148520d6Lennart Poettering " --network-ipvlan=INTERFACE\n"
11bf3cced13c885ca215c108cb0bdb7a148520d6Lennart Poettering " Create a ipvlan network interface based on an\n"
11bf3cced13c885ca215c108cb0bdb7a148520d6Lennart Poettering " existing network interface to the container\n"
0dd25fb9f005d8ab7ac4bc10a609d00569f8c56aLennart Poettering " -n --network-veth Add a virtual ethernet connection between host\n"
11bf3cced13c885ca215c108cb0bdb7a148520d6Lennart Poettering " and container\n"
11bf3cced13c885ca215c108cb0bdb7a148520d6Lennart Poettering " --network-bridge=INTERFACE\n"
11bf3cced13c885ca215c108cb0bdb7a148520d6Lennart Poettering " Add a virtual ethernet connection between host\n"
11bf3cced13c885ca215c108cb0bdb7a148520d6Lennart Poettering " and container and add it to an existing bridge on\n"
11bf3cced13c885ca215c108cb0bdb7a148520d6Lennart Poettering " -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n"
11bf3cced13c885ca215c108cb0bdb7a148520d6Lennart Poettering " Expose a container IP port on the host\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " -Z --selinux-context=SECLABEL\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " Set the SELinux security context to be used by\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " processes in the container\n"
9c0a72f961e00b0447767973e7117e131bea5b5dTom Gundersen " -L --selinux-apifs-context=SECLABEL\n"
1346b1f0388f4100bb3c2a2bb23bc881769c020cTom Gundersen " Set the SELinux security context to be used by\n"
9c0a72f961e00b0447767973e7117e131bea5b5dTom Gundersen " API/tmpfs file systems in the container\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --capability=CAP In addition to the default, retain specified\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " capability\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --drop-capability=CAP Drop the specified capability from the default set\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n"
6a24f1484fcc1f59f89617afbe4282667a358eabTom Gundersen " --link-journal=MODE Link up guest journal, one of no, auto, guest, host,\n"
6a24f1484fcc1f59f89617afbe4282667a358eabTom Gundersen " try-guest, try-host\n"
bbf7c04821a71fec67eaf0e7a34d17afc5913c13Tom Gundersen " -j Equivalent to --link-journal=try-guest\n"
e331e24649213f2e093e16e4d3d64ee823dfc375Tom Gundersen " --read-only Mount the root directory read-only\n"
bbf7c04821a71fec67eaf0e7a34d17afc5913c13Tom Gundersen " --bind=PATH[:PATH[:OPTIONS]]\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " Bind mount a file or directory from the host into\n"
52433f6b65eccd1c54606dde999610640f3458acTom Gundersen " the container\n"
dbffab87f1504abc9f189dd253111693c99fbd9aTom Gundersen " --bind-ro=PATH[:PATH[:OPTIONS]\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " Similar, but creates a read-only bind mount\n"
11bf3cced13c885ca215c108cb0bdb7a148520d6Lennart Poettering " --tmpfs=PATH:[OPTIONS] Mount an empty tmpfs to the specified directory\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --overlay=PATH[:PATH...]:PATH\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " Create an overlay mount from the host to \n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " the container\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --overlay-ro=PATH[:PATH...]:PATH\n"
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek " Similar, but creates a read-only overlay mount\n"
2ad8416dd057e7e3185169609ca3006e7649f576Zbigniew Jędrzejewski-Szmek " --setenv=NAME=VALUE Pass an environment variable to PID 1\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --share-system Share system namespaces with host\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --register=BOOLEAN Register container as machine\n"
e331e24649213f2e093e16e4d3d64ee823dfc375Tom Gundersen " --keep-unit Do not register a scope for the machine, reuse\n"
e331e24649213f2e093e16e4d3d64ee823dfc375Tom Gundersen " the service unit nspawn is running in\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --volatile[=MODE] Run the system in volatile mode\n"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen " --settings=BOOLEAN Load additional settings from .nspawn file\n"
02b59d57e0c08231645120077f651151f5bb2babTom Gundersenstatic int custom_mounts_prepare(void) {
45af44d47da6933b260c734ad9ff721f63f80a4dTom Gundersen /* Ensure the mounts are applied prefix first. */
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen qsort_safe(arg_custom_mounts, arg_n_custom_mounts, sizeof(CustomMount), custom_mount_compare);
bbf7c04821a71fec67eaf0e7a34d17afc5913c13Tom Gundersen /* Allocate working directories for the overlay file systems that need it */
3bef724f7e7f7eaca69881548b06e221b77d7031Tom Gundersen for (i = 0; i < arg_n_custom_mounts; i++) {
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen if (arg_userns && arg_uid_shift == UID_INVALID && path_equal(m->destination, "/")) {
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen log_error("--private-users with automatic UID shift may not be combined with custom root mounts.");
505f8da7325591defe5f751f328bd26915267602Tom Gundersen r = tempfn_random(m->source, NULL, &m->work_dir);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen return log_error_errno(r, "Failed to generate work directory from %s: %m", m->source);
6192b846ca0d15602e94ddb5da4420b7c60d64a5Tom Gundersenstatic int set_sanitized_path(char **b, const char *path) {
7951dea20911969287878e6897b3eca348721adeSusant Sahanistatic int detect_unified_cgroup_hierarchy(void) {
7951dea20911969287878e6897b3eca348721adeSusant Sahani const char *e;
7951dea20911969287878e6897b3eca348721adeSusant Sahani /* Allow the user to control whether the unified hierarchy is used */
7951dea20911969287878e6897b3eca348721adeSusant Sahani return log_error_errno(r, "Failed to parse $UNIFIED_CGROUP_HIERARCHY.");
85a8eeee36b57c1ab382b0225fa9a87525bbeee9Susant Sahani /* Otherwise inherit the default from the host system */
85a8eeee36b57c1ab382b0225fa9a87525bbeee9Susant Sahani return log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
c0dda18697e0994272c0c9616d36f6777b60e2c7Tom Gundersenstatic int parse_argv(int argc, char *argv[]) {
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen { "version", no_argument, NULL, ARG_VERSION },
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen { "directory", required_argument, NULL, 'D' },
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen { "template", required_argument, NULL, ARG_TEMPLATE },
71a6151083d842b2f5bf04e50239f0bf85d34d2eTom Gundersen { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen { "uuid", required_argument, NULL, ARG_UUID },
eb0ea358b688a6f83ff305c6b825c61f12b6dcb8Tom Gundersen { "read-only", no_argument, NULL, ARG_READ_ONLY },
eb0ea358b688a6f83ff305c6b825c61f12b6dcb8Tom Gundersen { "capability", required_argument, NULL, ARG_CAPABILITY },
eb0ea358b688a6f83ff305c6b825c61f12b6dcb8Tom Gundersen { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
eb0ea358b688a6f83ff305c6b825c61f12b6dcb8Tom Gundersen { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
6ae115c1fe95611b39d2f20cfcea3d385429f59eTom Gundersen { "bind", required_argument, NULL, ARG_BIND },
6ae115c1fe95611b39d2f20cfcea3d385429f59eTom Gundersen { "bind-ro", required_argument, NULL, ARG_BIND_RO },
6ae115c1fe95611b39d2f20cfcea3d385429f59eTom Gundersen { "tmpfs", required_argument, NULL, ARG_TMPFS },
6ae115c1fe95611b39d2f20cfcea3d385429f59eTom Gundersen { "overlay", required_argument, NULL, ARG_OVERLAY },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "overlay-ro", required_argument, NULL, ARG_OVERLAY_RO },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "setenv", required_argument, NULL, ARG_SETENV },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "selinux-context", required_argument, NULL, 'Z' },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "selinux-apifs-context", required_argument, NULL, 'L' },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "share-system", no_argument, NULL, ARG_SHARE_SYSTEM },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "register", required_argument, NULL, ARG_REGISTER },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
b98b483bac585af754e8a22ea890db8486905d8aAlin Rauta { "personality", required_argument, NULL, ARG_PERSONALITY },
ed942a9eb22d50f667909ad6184b45015d28d054Tom Gundersen { "volatile", optional_argument, NULL, ARG_VOLATILE },
ed942a9eb22d50f667909ad6184b45015d28d054Tom Gundersen { "property", required_argument, NULL, ARG_PROPERTY },
ed942a9eb22d50f667909ad6184b45015d28d054Tom Gundersen { "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
ed942a9eb22d50f667909ad6184b45015d28d054Tom Gundersen { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
bd8f65387673e29f46136a4ed172097035002c23Tom Gundersen { "settings", required_argument, NULL, ARG_SETTINGS },
bd8f65387673e29f46136a4ed172097035002c23Tom Gundersen bool mask_all_settings = false, mask_no_settings = false;
0dd25fb9f005d8ab7ac4bc10a609d00569f8c56aLennart Poettering while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:qi:xp:n", options, NULL)) >= 0)
e331e24649213f2e093e16e4d3d64ee823dfc375Tom Gundersen r = set_sanitized_path(&arg_directory, optarg);
case ARG_TEMPLATE:
arg_ephemeral = true;
return log_oom();
case ARG_NETWORK_BRIDGE:
return log_oom();
arg_network_veth = true;
arg_private_network = true;
case ARG_NETWORK_INTERFACE:
return log_oom();
arg_private_network = true;
case ARG_NETWORK_MACVLAN:
return log_oom();
arg_private_network = true;
case ARG_NETWORK_IPVLAN:
return log_oom();
case ARG_PRIVATE_NETWORK:
arg_private_network = true;
arg_boot = true;
case ARG_UUID:
return -EINVAL;
return log_oom();
case ARG_READ_ONLY:
arg_read_only = true;
case ARG_CAPABILITY:
case ARG_DROP_CAPABILITY: {
_cleanup_free_ char *t;
return log_oom();
if (c == ARG_CAPABILITY)
int cap;
if (cap < 0) {
return -EINVAL;
if (c == ARG_CAPABILITY)
arg_link_journal_try = true;
case ARG_LINK_JOURNAL:
arg_link_journal_try = false;
arg_link_journal_try = false;
arg_link_journal_try = false;
arg_link_journal_try = false;
arg_link_journal_try = true;
arg_link_journal_try = true;
return -EINVAL;
case ARG_BIND:
case ARG_BIND_RO:
case ARG_TMPFS:
case ARG_OVERLAY:
case ARG_OVERLAY_RO: {
CustomMount *m;
if (r == -ENOMEM)
return log_oom();
if (!path_is_absolute(*i)) {
return -EINVAL;
return -EINVAL;
if (!destination)
return log_oom();
return log_oom();
case ARG_SETENV: {
return -EINVAL;
return log_oom();
arg_setenv = n;
arg_quiet = true;
case ARG_SHARE_SYSTEM:
arg_share_system = true;
case ARG_REGISTER:
arg_register = r;
case ARG_KEEP_UNIT:
arg_keep_unit = true;
case ARG_PERSONALITY:
return -EINVAL;
case ARG_VOLATILE:
if (!optarg)
VolatileMode m;
return -EINVAL;
arg_volatile_mode = m;
if (r == -EEXIST)
case ARG_PROPERTY:
return log_oom();
case ARG_PRIVATE_USERS:
if (optarg) {
if (range) {
if (!buffer)
return log_oom();
range++;
return -EINVAL;
return -EINVAL;
arg_userns = true;
case ARG_KILL_SIGNAL:
if (arg_kill_signal < 0) {
return -EINVAL;
case ARG_SETTINGS:
mask_all_settings = false;
mask_no_settings = false;
arg_settings_trusted = true;
mask_all_settings = false;
mask_no_settings = true;
mask_all_settings = false;
mask_no_settings = false;
mask_all_settings = true;
mask_no_settings = false;
arg_settings_trusted = false;
return -EINVAL;
if (arg_share_system)
arg_register = false;
return -EINVAL;
return -EINVAL;
return -EINVAL;
return -EINVAL;
return -EINVAL;
return -EINVAL;
return -EINVAL;
return -EINVAL;
return log_error_errno(EOPNOTSUPP, "--private-users= is not supported, kernel compiled without user namespace support.");
if (!arg_parameters)
return log_oom();
if (mask_no_settings)
arg_settings_mask = 0;
if (mask_all_settings)
static int verify_arguments(void) {
log_error("Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy.");
return -EINVAL;
return -EINVAL;
assert(p);
if (!arg_userns)
return -EOVERFLOW;
return -EOVERFLOW;
return -errno;
return -errno;
log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
if (y && streq(y, z))
if (arg_private_network)
/* Fix resolv.conf, if possible */
assert(s);
if (arg_share_system)
static const char devnodes[] =
u = umask(0000);
return -EIO;
#ifdef HAVE_SELINUX
if (!options)
return log_oom();
if (userns_lchown(p, 0, 0) < 0)
if (userns_lchown(p, 0, 0) < 0)
if (userns_lchown(p, 0, 0) < 0)
const char *to;
u = umask(0000);
int fd, r;
u = umask(0000);
if (fd < 0)
assert(m);
static int setup_hostname(void) {
if (arg_share_system)
return -errno;
const char *etc_machine_id, *p, *q;
char *id;
if (arg_ephemeral)
return -EEXIST;
if (path_is_mount_point(p, 0) > 0) {
return -EEXIST;
if (path_is_mount_point(q, 0) > 0) {
return -EEXIST;
r = readlink_and_make_absolute(p, &d);
path_equal(d, q)) {
if (unlink(p) < 0)
} else if (r == -EINVAL) {
rmdir(p) < 0) {
return -errno;
} else if (r != -ENOENT) {
if (symlink(q, p) < 0) {
if (arg_link_journal_try) {
return -errno;
if (arg_link_journal_try) {
if (dir_is_empty(q) == 0)
static int drop_capabilities(void) {
static int reset_audit_loginuid(void) {
if (arg_share_system)
if (r == -ENOENT)
static int setup_seccomp(void) {
#ifdef HAVE_SECCOMP
int syscall_num;
} blacklist[] = {
if (!seccomp)
return log_oom();
goto finish;
if (r == -EFAULT)
goto finish;
r = seccomp_rule_add(
goto finish;
goto finish;
if (r == -EINVAL) {
log_debug_errno(r, "Kernel is probably not configured with CONFIG_SECCOMP. Disabling seccomp audit filter: %m");
goto finish;
goto finish;
int r, nr;
if (fd < 0)
return log_oom();
*device_path = p;
r = fd;
return -EINVAL;
if (control < 0)
if (nr < 0)
return log_oom();
if (loop < 0)
if (arg_read_only)
r = loop;
#define PARTITION_TABLE_BLURB \
static int dissect_image(
int fd,
bool *secondary) {
#ifdef HAVE_BLKID
#ifdef GPT_ROOT_NATIVE
#ifdef GPT_ROOT_SECONDARY
_cleanup_free_ char *home = NULL, *root = NULL, *secondary_root = NULL, *srv = NULL, *generic = NULL;
b = blkid_new_probe();
return log_oom();
errno = 0;
if (errno == 0)
return log_oom();
return -errno;
errno = 0;
r = blkid_do_safeprobe(b);
return -EINVAL;
if (errno == 0)
return -errno;
return -EINVAL;
errno = 0;
if (!pl) {
if (errno == 0)
return log_oom();
return -errno;
if (!udev)
return log_oom();
return log_oom();
return -ENXIO;
return log_oom();
r = udev_enumerate_add_match_parent(e, d);
return log_oom();
r = udev_enumerate_scan_devices(e);
return -EIO;
r = -errno;
if (r >= 0 || r != -EBUSY)
e = udev_enumerate_unref(e);
const char *node;
unsigned long long flags;
int nr;
errno = 0;
if (!errno)
return -errno;
if (!node)
if (!pp)
if (nr < 0)
if (is_gpt) {
const char *stype;
if (!stype)
return log_oom();
return log_oom();
#ifdef GPT_ROOT_NATIVE
return log_oom();
#ifdef GPT_ROOT_SECONDARY
return log_oom();
if (generic)
multiple_generic = true;
return log_oom();
} else if (is_mbr) {
int type;
if (generic)
multiple_generic = true;
generic_rw = true;
return log_oom();
if (root) {
*secondary = false;
} else if (secondary_root) {
*secondary = true;
} else if (generic) {
if (multiple_generic) {
return -EINVAL;
*secondary = false;
return -EINVAL;
if (home) {
if (srv) {
return -EOPNOTSUPP;
#ifdef HAVE_BLKID
const char *fstype, *p;
if (arg_read_only)
rw = false;
if (directory)
p = where;
errno = 0;
if (errno == 0)
return log_oom();
return -errno;
errno = 0;
r = blkid_do_safeprobe(b);
return -EINVAL;
if (errno == 0)
return -errno;
errno = 0;
if (errno == 0)
return -errno;
return -EOPNOTSUPP;
return -EOPNOTSUPP;
static int mount_devices(
const char *where,
if (root_device) {
if (home_device) {
if (srv_device) {
if (nr < 0)
if (control < 0) {
case CLD_EXITED:
log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s failed with error code %i.", arg_machine, status.si_status);
case CLD_KILLED:
case CLD_DUMPED:
log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
return -EIO;
return -EIO;
static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
if (pid > 0) {
static int determine_names(void) {
if (!arg_directory)
return log_oom();
if (arg_machine) {
return -ENOENT;
if (!arg_ephemeral)
return -EINVAL;
if (!arg_machine) {
if (!arg_machine)
return log_oom();
return -EINVAL;
if (arg_ephemeral) {
return log_oom();
arg_machine = b;
if (!arg_userns) {
arg_uid_shift = 0;
return -EINVAL;
return -EINVAL;
log_info("Using user namespaces with base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
static int inner_child(
const char *directory,
bool secondary,
int kmsg_socket,
int rtnl_socket,
const char *envp[] = {
if (arg_userns) {
return -ESRCH;
return -ESRCH;
r = reset_uid_gid();
if (setsid() < 0)
if (arg_private_network)
if (arg_expose_ports) {
if (drop_capabilities() < 0)
} else if (secondary) {
#ifdef HAVE_SELINUX
if (arg_selinux_context)
n_env ++;
return log_oom();
if (asprintf((char**)(envp + n_env++), "container_uuid=%s", id128_format_as_uuid(arg_uuid, as_uuid)) < 0)
return log_oom();
return log_oom();
if (!env_use)
return log_oom();
return -ESRCH;
log_close();
if (arg_boot) {
size_t m;
(void) log_open();
static int outer_child(
const char *directory,
const char *console,
bool interactive,
bool secondary,
int pid_socket,
int kmsg_socket,
int rtnl_socket,
int uid_shift_socket,
ssize_t l;
if (interactive) {
if (r != STDIN_FILENO) {
safe_close(r);
r = -EINVAL;
r = reset_audit_loginuid();
if (arg_userns) {
if (l != sizeof(arg_uid_shift)) {
return -EIO;
r = setup_volatile(directory, arg_volatile_mode, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_context);
r = setup_volatile_state(directory, arg_volatile_mode, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_context);
if (arg_read_only) {
r = setup_seccomp();
r = mount_custom(directory, arg_custom_mounts, arg_n_custom_mounts, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
r = mount_cgroups(directory, arg_unified_cgroup_hierarchy, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
NULL);
if (pid < 0)
if (pid == 0) {
if (l != sizeof(pid)) {
return -EIO;
char uid_map[strlen("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1], line[DECIMAL_STR_MAX(uid_t)*3+3+1];
static int load_settings(void) {
const char *fn, *i;
return log_oom();
j = NULL;
if (arg_settings_trusted < 0)
arg_settings_trusted = true;
if (arg_image) {
return log_oom();
} else if (arg_directory) {
return log_oom();
if (arg_settings_trusted < 0)
arg_settings_trusted = false;
if (!arg_settings_trusted)
if (!arg_settings_trusted)
log_warning("Ignoring TemporaryFileSystem=, Bind= and BindReadOnly= settings, file %s is not trusted.", p);
if (!arg_settings_trusted)
if (!arg_settings_trusted)
_cleanup_free_ char *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL, *console = NULL;
_cleanup_release_lock_file_ LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT;
bool interactive;
log_open();
goto finish;
if (geteuid() != 0) {
r = -EPERM;
goto finish;
r = determine_names();
goto finish;
r = load_settings();
goto finish;
r = verify_arguments();
goto finish;
if (n_fd_passed > 0) {
goto finish;
if (arg_directory) {
r = -EINVAL;
goto finish;
if (arg_ephemeral) {
goto finish;
goto finish;
r = image_path_lock(np, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
goto finish;
r = btrfs_subvol_snapshot(arg_directory, np, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE);
goto finish;
remove_subvol = true;
r = image_path_lock(arg_directory, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
if (r == -EBUSY) {
goto finish;
if (arg_template) {
r = btrfs_subvol_snapshot(arg_template, arg_directory, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE);
if (r == -EEXIST) {
if (!arg_quiet)
log_info("Directory %s already exists, not populating from template %s.", arg_directory, arg_template);
goto finish;
if (!arg_quiet)
if (arg_boot) {
log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", arg_directory);
r = -EINVAL;
goto finish;
log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
r = -EINVAL;
goto finish;
r = image_path_lock(arg_image, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
if (r == -EBUSY) {
goto finish;
goto finish;
r = -errno;
goto finish;
if (!arg_directory) {
r = log_oom();
goto finish;
if (image_fd < 0) {
r = image_fd;
goto finish;
&secondary);
goto finish;
r = custom_mounts_prepare();
goto finish;
if (master < 0) {
goto finish;
goto finish;
goto finish;
if (!arg_quiet)
goto finish;
_cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 }, rtnl_socket_pair[2] = { -1, -1 }, pid_socket_pair[2] = { -1, -1 },
int ifi = 0;
ssize_t l;
char last_char = 0;
goto finish;
goto finish;
goto finish;
goto finish;
if (arg_userns)
goto finish;
goto finish;
goto finish;
if (pid < 0) {
r = log_error_errno(errno, "clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
goto finish;
if (pid == 0) {
(void) reset_all_signal_handlers();
(void) reset_signal_mask();
fds);
goto finish;
r = -EIO;
goto finish;
pid = 0;
goto finish;
if (l != sizeof(pid)) {
r = EIO;
goto finish;
if (arg_userns) {
r = -ESRCH;
goto finish;
goto finish;
if (l != sizeof(arg_uid_shift)) {
r = EIO;
goto finish;
goto finish;
if (arg_private_network) {
goto finish;
if (arg_network_veth) {
goto finish;
ifi = r;
if (arg_network_bridge) {
goto finish;
ifi = r;
goto finish;
goto finish;
if (arg_register) {
r = register_machine(
pid,
ifi,
goto finish;
goto finish;
if (arg_keep_unit) {
goto finish;
goto finish;
goto finish;
r = -ESRCH;
goto finish;
sd_notifyf(false,
goto finish;
if (arg_kill_signal > 0) {
if (arg_expose_ports) {
goto finish;
goto finish;
goto finish;
pid = 0;
goto finish;
ret = r;
if (arg_keep_unit) {
sd_notify(false,
if (pid > 0)
if (master >= 0)
if (arg_machine) {