cgroup-util.c revision a0ab566574303be1ca12cdb334f284cfd407caa5
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2010 Lennart Poettering
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <errno.h>
#include <unistd.h>
#include <signal.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <ftw.h>
#include "cgroup-util.h"
#include "log.h"
#include "set.h"
#include "macro.h"
#include "util.h"
#include "path-util.h"
#include "strv.h"
#include "unit-name.h"
#include "fileio.h"
FILE *f;
int r;
if (r < 0)
return r;
if (!f)
return -errno;
*_f = f;
return 0;
}
FILE *f;
int r;
if (r < 0)
return r;
if (!f)
return -errno;
*_f = f;
return 0;
}
unsigned long ul;
/* Note that the cgroup.procs might contain duplicates! See
* cgroups.txt for details. */
assert(f);
errno = 0;
if (feof(f))
return 0;
}
if (ul <= 0)
return -EIO;
return 1;
}
int r;
DIR *d;
/* This is not recursive! */
if (r < 0)
return r;
if (!d)
return -errno;
*_d = d;
return 0;
}
assert(d);
char *b;
continue;
continue;
if (!b)
return -ENOMEM;
*fn = b;
return 1;
}
return 0;
}
_cleanup_free_ char *p = NULL;
int r;
if (r < 0)
return r;
if (honour_sticky) {
char *tasks;
/* If the sticky bit is set don't remove the directory */
if (!tasks)
return -ENOMEM;
r = file_is_priv_sticky(tasks);
if (r > 0)
return 0;
}
r = rmdir(p);
return -errno;
return 0;
}
int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
bool done = false;
int r, ret = 0;
/* This goes through the tasks list and kills them all. This
* is repeated until no further processes are added to the
* tasks list, to properly handle forking processes */
if (!s) {
if (!s)
return -ENOMEM;
}
do {
done = true;
if (r < 0) {
return r;
return ret;
}
while ((r = cg_read_pid(f, &pid)) > 0) {
continue;
continue;
/* If we haven't killed this process yet, kill
* it */
} else if (ret == 0) {
if (sigcont)
ret = 1;
}
done = false;
if (r < 0) {
if (ret >= 0)
return r;
return ret;
}
}
if (r < 0) {
if (ret >= 0)
return r;
return ret;
}
/* To avoid racing against processes which fork
* quicker than we can kill them we repeat this until
* no new pids need to be killed. */
} while (!done);
return ret;
}
int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
int r, ret = 0;
char *fn;
if (!s) {
if (!s)
return -ENOMEM;
}
if (r < 0) {
return r;
return ret;
}
while ((r = cg_read_subgroup(d, &fn)) > 0) {
_cleanup_free_ char *p = NULL;
if (!p)
return -ENOMEM;
if (ret >= 0 && r != 0)
ret = r;
}
if (ret >= 0 && r < 0)
ret = r;
if (rem) {
return r;
}
return ret;
}
unsigned i;
/* This safely kills all processes; first it sends a SIGTERM,
* then checks 8 times after 200ms whether the group is now
* empty, then kills everything that is left with SIGKILL and
* finally checks 5 times after 200ms each whether the group
* is finally empty. */
for (i = 0; i < 15; i++) {
int sig, r;
if (i <= 0)
else if (i == 9)
else
sig = 0;
if (r <= 0)
return r;
}
return 0;
}
int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
bool done = false;
int r, ret = 0;
if (!s)
return -ENOMEM;
do {
done = true;
if (r < 0) {
return r;
return ret;
}
while ((r = cg_read_pid(f, &pid)) > 0) {
/* This might do weird stuff if we aren't a
* single-threaded program. However, we
* luckily know we are not */
continue;
continue;
if (r < 0) {
ret = r;
} else if (ret == 0)
ret = 1;
done = false;
if (r < 0) {
if (ret >= 0)
return r;
return ret;
}
}
if (r < 0) {
if (ret >= 0)
return r;
return ret;
}
} while (!done);
return ret;
}
int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool rem) {
int r, ret = 0;
char *fn;
if (r < 0) {
return r;
return ret;
}
while ((r = cg_read_subgroup(d, &fn)) > 0) {
_cleanup_free_ char *p = NULL;
if (!p) {
if (ret >= 0)
return -ENOMEM;
return ret;
}
if (r != 0 && ret >= 0)
ret = r;
}
if (r < 0 && ret >= 0)
ret = r;
if (rem) {
return r;
}
return ret;
}
static const char *normalize_controller(const char *controller) {
return "systemd";
return controller + 5;
else
return controller;
}
char *t = NULL;
if (controller) {
else if (path)
else if (suffix)
else
} else {
else if (path)
else
return -EINVAL;
}
if (!t)
return -ENOMEM;
*fs = t;
return 0;
}
const char *p;
return -EINVAL;
if (_unlikely_(!good)) {
int r;
r = path_is_mount_point("/sys/fs/cgroup", false);
if (r <= 0)
return r < 0 ? r : -ENOENT;
/* Cache this to save a few stat()s */
good = true;
}
}
static int check_hierarchy(const char *p) {
char *cc;
assert(p);
/* Check if this controller actually really exists */
return -errno;
return 0;
}
int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
const char *p;
int r;
if (!cg_controller_is_valid(controller, true))
return -EINVAL;
/* Normalize the controller syntax */
/* Check if this controller actually really exists */
r = check_hierarchy(p);
if (r < 0)
return r;
}
char *p;
bool is_sticky;
return 0;
return 0;
if (!p) {
return 1;
}
is_sticky = file_is_priv_sticky(p) > 0;
free(p);
if (is_sticky)
return 0;
return 0;
}
int r = 0;
if (r < 0)
return r;
errno = 0;
if (delete_root) {
bool is_sticky;
char *p;
if (!p)
return -ENOMEM;
is_sticky = file_is_priv_sticky(p) > 0;
free(p);
if (!is_sticky)
return -errno;
}
return r;
}
int r;
if (r < 0)
return r;
return r == -ENOENT ? 0 : r;
}
int r;
if (r < 0)
return r;
if (pid == 0)
return write_string_file(fs, c);
}
int cg_set_group_access(
const char *controller,
const char *path,
int r;
mode &= 0777;
if (r < 0)
return r;
}
int cg_set_task_access(
const char *controller,
const char *path,
int sticky) {
int r;
return 0;
mode &= 0666;
if (r < 0)
return r;
/* Both mode and sticky param are passed */
/* Only one param is passed, hence read the current
* mode from the file itself */
if (r < 0)
return -errno;
/* No mode set, we just shall set the sticky bit */
else
/* Only mode set, leave sticky bit untouched */
}
if (r < 0)
return r;
/* Always keep values for "cgroup.procs" in sync with "tasks" */
if (r < 0)
return r;
}
const char *fs;
if (controller) {
if (!cg_controller_is_valid(controller, true))
return -EINVAL;
} else
if (pid == 0)
else
if (!f)
char *l, *p, *w, *e;
size_t k;
char *state;
bool found = false;
if (!l)
continue;
l++;
e = strchr(l, ':');
if (!e)
continue;
*e = 0;
found = true;
break;
}
if (k == 5 + cs &&
found = true;
break;
}
}
if (!found)
continue;
p = strdup(e + 1);
if (!p)
return -ENOMEM;
*path = p;
return 0;
}
return -ENOENT;
}
char *sc;
int r;
if (r < 0)
return r;
if (r < 0)
return r;
if (sc[0] == 0) {
if (r < 0)
return r;
return -EEXIST;
if (r < 0)
return r;
if (r < 0)
return r;
if (r < 0)
return r;
return 1;
}
return -EIO;
return 0;
}
bool found = false;
int r;
if (r < 0)
return r == -ENOENT ? 1 : r;
while ((r = cg_read_pid(f, &pid)) > 0) {
continue;
found = true;
break;
}
if (r < 0)
return r;
return !found;
}
int r;
if (r < 0)
return r;
}
char *fn;
int r;
if (r <= 0)
return r;
if (r < 0)
return r == -ENOENT ? 1 : r;
while ((r = cg_read_subgroup(d, &fn)) > 0) {
_cleanup_free_ char *p = NULL;
if (!p)
return -ENOMEM;
if (r <= 0)
return r;
}
if (r < 0)
return r;
return 1;
}
const char *e;
_cleanup_free_ char *v = NULL;
if (*spec == '/') {
if (!path_is_safe(spec))
return -EINVAL;
if (path) {
if (!t)
return -ENOMEM;
*path = t;
}
if (controller)
*controller = NULL;
return 0;
}
if (!e) {
if (!cg_controller_is_valid(spec, true))
return -EINVAL;
if (controller) {
if (!t)
return -ENOMEM;
*controller = t;
}
if (path)
return 0;
}
if (!v)
return -ENOMEM;
t = strdup(normalize_controller(v));
if (!t)
return -ENOMEM;
if (!cg_controller_is_valid(t, true)) {
free(t);
return -EINVAL;
}
u = strdup(e+1);
if (!u) {
free(t);
return -ENOMEM;
}
if (!path_is_safe(u) ||
!path_is_absolute(u)) {
free(t);
free(u);
return -EINVAL;
}
if (controller)
*controller = t;
else
free(t);
if (path)
*path = u;
else
free(u);
return 0;
}
char *s;
if (!controller)
controller = "systemd";
else {
if (!cg_controller_is_valid(controller, true))
return -EINVAL;
}
if (!path_is_absolute(path))
return -EINVAL;
if (!s)
return -ENOMEM;
*spec = s;
return 0;
}
char *t;
int r;
/* First check if it already is a filesystem path */
if (!t)
return -ENOMEM;
*result = t;
return 0;
}
/* Otherwise treat it as cg spec */
r = cg_split_spec(path, &c, &p);
if (r < 0)
return r;
}
int cg_get_system_path(char **path) {
char *p;
int r;
if (r < 0) {
p = strdup("/system");
if (!p)
return -ENOMEM;
}
if (endswith(p, "/system"))
*path = p;
else {
char *q;
q = strappend(p, "/system");
free(p);
if (!q)
return -ENOMEM;
*path = q;
}
return 0;
}
int cg_get_root_path(char **path) {
char *root, *e;
int r;
if (r < 0)
return r;
if (e == root)
e[1] = 0;
else if (e)
*e = 0;
return 0;
}
int cg_get_user_path(char **path) {
char *p;
/* Figure out the place to put user cgroups below. We use the
* same as PID 1 has but with the "/system" suffix replaced by
* "/user" */
p = strdup("/user");
else
if (!p)
return -ENOMEM;
*path = p;
return 0;
}
char *p;
if (machine) {
if (!escaped)
return -ENOMEM;
}
if (!p)
return -ENOMEM;
*path = p;
return 0;
}
char **cg_shorten_controllers(char **controllers) {
char **f, **t;
if (!controllers)
return controllers;
for (f = controllers, t = controllers; *f; f++) {
const char *p;
int r;
p = normalize_controller(*f);
if (streq(p, "systemd")) {
free(*f);
continue;
}
if (!cg_controller_is_valid(p, true)) {
log_warning("Controller %s is not valid, removing from controllers list.", p);
free(*f);
continue;
}
r = check_hierarchy(p);
if (r < 0) {
log_debug("Controller %s is not available, removing from controllers list.", p);
free(*f);
continue;
}
*(t++) = *f;
}
*t = NULL;
return strv_uniq(controllers);
}
char *cg_process, *p;
int r;
r = cg_get_root_path(&cg_root);
if (r < 0)
return r;
if (r < 0)
return r;
if (p)
p--;
else
p = cg_process;
if (cgroup) {
char* c;
c = strdup(p);
if (!c) {
return -ENOMEM;
}
*cgroup = c;
}
if (root) {
cg_process[p-cg_process] = 0;
*root = cg_process;
} else
return 0;
}
char *p, *e, *c, *s, *k;
c = cg_unescape(c);
/* Could this be a valid unit name? */
if (!unit_name_is_valid(c, true))
return -EINVAL;
if (!unit_name_is_template(c))
s = strdup(c);
else {
if (*e != '/')
return -EINVAL;
e += strspn(e, "/");
p = strchrnul(e, '/');
k = strndupa(e, p - e);
k = cg_unescape(k);
if (!unit_name_is_valid(k, false))
return -EINVAL;
s = strdup(k);
}
if (!s)
return -ENOMEM;
*unit = s;
return 0;
}
const char *e;
if (!e)
return -ENOENT;
return cg_path_decode_unit(e, unit);
}
int r;
if (r < 0)
return r;
}
_pure_ static const char *skip_label(const char *e) {
assert(e);
e = strchr(e, '/');
if (!e)
return NULL;
e += strspn(e, "/");
return e;
}
const char *e;
/* We always have to parse the path from the beginning as unit
* cgroups might have arbitrary child cgroups and we shouldn't get
* confused by those */
if (!e)
return -ENOENT;
/* Skip the user name */
e = skip_label(e);
if (!e)
return -ENOENT;
/* Skip the session ID */
e = skip_label(e);
if (!e)
return -ENOENT;
/* Skip the systemd cgroup */
e = skip_label(e);
if (!e)
return -ENOENT;
return cg_path_decode_unit(e, unit);
}
int r;
if (r < 0)
return r;
}
const char *e, *n;
char *s, *r;
if (!e)
return -ENOENT;
n = strchrnul(e, '/');
if (e == n)
return -ENOENT;
s = strndupa(e, n - e);
r = strdup(cg_unescape(s));
if (!r)
return -ENOMEM;
*machine = r;
return 0;
}
int r;
if (r < 0)
return r;
}
const char *e, *n;
char *s;
if (!e)
return -ENOENT;
/* Skip the user name */
e = skip_label(e);
if (!e)
return -ENOENT;
n = strchrnul(e, '/');
if (n - e < 8)
return -ENOENT;
return -ENOENT;
s = strndup(e, n - e - 8);
if (!s)
return -ENOMEM;
*session = s;
return 0;
}
int r;
if (r < 0)
return r;
}
const char *e, *n;
char *s;
if (!e)
return -ENOENT;
n = strchrnul(e, '/');
if (n - e < 5)
return -ENOENT;
return -ENOENT;
s = strndupa(e, n - e - 5);
if (!s)
return -ENOMEM;
}
int r;
if (r < 0)
return r;
}
const char *dot;
char *c;
if (!filename_is_safe(attr))
return -EINVAL;
if (!dot) {
*controller = NULL;
return 0;
}
if (!c)
return -ENOMEM;
if (!cg_controller_is_valid(c, false)) {
free(c);
return -EINVAL;
}
*controller = c;
return 1;
}
char *cg_escape(const char *p) {
bool need_prefix = false;
/* This implements very minimal escaping for names to be used
* as file names in the cgroup tree: any name which might
* conflict with a kernel name or is prefixed with '_' is
* prefixed with a '_'. That way, when reading cgroup names it
* is sufficient to remove a single prefixing underscore if
* there is one. */
/* The return value of this function (unlike cg_unescape())
* needs free()! */
if (p[0] == 0 ||
p[0] == '_' ||
p[0] == '.' ||
streq(p, "notify_on_release") ||
streq(p, "release_agent") ||
streq(p, "tasks"))
need_prefix = true;
else {
const char *dot;
if (dot) {
need_prefix = true;
else {
char *n;
if (check_hierarchy(n) >= 0)
need_prefix = true;
}
}
}
if (need_prefix)
return strappend("_", p);
else
return strdup(p);
}
char *cg_unescape(const char *p) {
assert(p);
/* The return value of this function (unlike cg_escape())
* doesn't need free()! */
if (p[0] == '_')
return (char*) p+1;
return (char*) p;
}
#define CONTROLLER_VALID \
"0123456789" \
"abcdefghijklmnopqrstuvwxyz" \
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
"_"
bool cg_controller_is_valid(const char *p, bool allow_named) {
const char *t, *s;
if (!p)
return false;
if (allow_named) {
s = startswith(p, "name=");
if (s)
p = s;
}
if (*p == 0 || *p == '_')
return false;
for (t = p; *t; t++)
if (!strchr(CONTROLLER_VALID, *t))
return false;
if (t - p > FILENAME_MAX)
return false;
return true;
}