sched.c revision 9acbbeaf2a1ffe5c14b244867d427714fab43c5c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/cred_impl.h>
#include <sys/ucred.h>
#include <ucred.h>
#include <stdlib.h>
#include <signal.h>
#include <errno.h>
#include <sched.h>
#include <strings.h>
#include <pthread.h>
#include <time.h>
#include <thread.h>
#include <alloca.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/lx_syscall.h>
#include <sys/lx_debug.h>
#include <sys/lx_brand.h>
#include <sys/lx_misc.h>
#include <sys/lx_sched.h>
/* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */
static int
validate_policy(int policy)
{
switch (policy) {
case LX_SCHED_FIFO:
return (SCHED_FIFO);
case LX_SCHED_RR:
return (SCHED_RR);
case LX_SCHED_OTHER:
return (SCHED_OTHER);
default:
lx_debug("validate_policy: illegal policy: %d", policy);
return (-EINVAL);
}
}
/*
* Check to see if we have the permissions to set scheduler parameters and
* policy, based on Linux' demand that such commands fail with errno set to
* EPERM if the current euid is not the euid or ruid of the process in
* question.
*/
static int
check_schedperms(pid_t pid)
{
size_t sz;
ucred_t *cr;
uid_t euid;
euid = geteuid();
if (pid == getpid()) {
/*
* If we're the process to be checked, simply check the euid
* against our ruid.
*/
if (euid != getuid())
return (-EPERM);
return (0);
}
/*
* We allocate a ucred_t ourselves rather than call ucred_get(3C)
* because ucred_get() calls malloc(3C), which the brand library cannot
* use. Because we allocate the space with SAFE_ALLOCA(), there's
* no need to free it when we're done.
*/
sz = ucred_size();
cr = (ucred_t *)SAFE_ALLOCA(sz);
if (cr == NULL)
return (-ENOMEM);
/*
* If we can't access the process' credentials, fail with errno EPERM
* as the call would not have succeeded anyway.
*/
if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0)
return ((errno == EACCES) ? -EPERM : -errno);
if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr)))
return (-EPERM);
return (0);
}
static int
ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp)
{
struct lx_sched_param ls;
int smin = sched_get_priority_min(policy);
int smax = sched_get_priority_max(policy);
if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0)
return (-errno);
bzero(sp, sizeof (struct sched_param));
/*
* Linux has a fixed priority range, 0 - 99, which we need to convert to
* Solaris's dynamic range. Linux considers lower numbers to be
* higher priority, so we'll invert the priority within Solaris's range.
*
* The formula to convert between ranges is:
*
* L * (smax - smin)
* S = ----------------- + smin
* (lmax - lmin)
*
* where S is the Solaris equivalent of the linux priority L.
*
* To invert the priority, we use:
* S' = smax - S + smin
*
* Together, these two formulas become:
*
* L * (smax - smin)
* S = smax - ----------------- + 2smin
* 99
*/
sp->sched_priority = smax -
((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin;
lx_debug("ltos_sparam: linux prio %d = Solaris prio %d "
"(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority,
smin, smax);
return (0);
}
static int
stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp)
{
struct lx_sched_param ls;
int smin = sched_get_priority_min(policy);
int smax = sched_get_priority_max(policy);
if (policy == SCHED_OTHER) {
/*
* In Linux, the only valid SCHED_OTHER scheduler priority is 0
*/
ls.lx_sched_prio = 0;
} else {
/*
* Convert Solaris's dynamic, inverted priority range to the
* fixed Linux range of 1 - 99.
*
* The formula is (see above):
*
* (smax - s + 2smin) * 99
* l = -----------------------
* smax - smin
*/
ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) *
LX_PRI_MAX) / (smax - smin);
}
lx_debug("stol_sparam: Solaris prio %d = linux prio %d "
"(Solaris range %d,%d)\n", sp->sched_priority, ls.lx_sched_prio,
smin, smax);
return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0)
? -errno : 0);
}
#define BITINDEX(ind) (ind / (sizeof (ulong_t) * 8))
#define BITSHIFT(ind) (1 << (ind % (sizeof (ulong_t) * 8)))
/* ARGSUSED */
int
lx_sched_getaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
{
int sz;
ulong_t *lmask, *zmask;
int i;
sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, pid, len, maskp);
if (sz == -1)
return (-errno);
/*
* If the target LWP hasn't ever had an affinity mask set, the kernel
* will return a mask of all 0's. If that is the case we must build a
* default mask that has all valid bits turned on.
*/
lmask = SAFE_ALLOCA(sz);
zmask = SAFE_ALLOCA(sz);
if (lmask == NULL || zmask == NULL)
return (-ENOMEM);
bzero(zmask, sz);
if (uucopy((void *)maskp, lmask, sz) != 0)
return (-EFAULT);
if (bcmp(lmask, zmask, sz) != 0)
return (sz);
for (i = 0; i < sz * 8; i++) {
if (p_online(i, P_STATUS) != -1) {
lmask[BITINDEX(i)] |= BITSHIFT(i);
}
}
if (uucopy(lmask, (void *)maskp, sz) != 0)
return (-EFAULT);
return (sz);
}
/* ARGSUSED */
int
lx_sched_setaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
{
int ret;
int sz;
int i;
int found;
ulong_t *lmask;
pid_t s_pid;
lwpid_t s_tid;
processorid_t cpuid;
if ((pid_t)pid < 0)
return (-EINVAL);
if (lx_lpid_to_spair(pid, &s_pid, &s_tid) < 0)
return (-ESRCH);
/*
* We only support setting affinity masks for threads in
* the calling process.
*/
if (s_pid != getpid())
return (-EPERM);
/*
* First, get the minimum bitmask size from the kernel.
*/
sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, 0, 0, 0);
if (sz == -1)
return (-errno);
lmask = SAFE_ALLOCA(sz);
if (lmask == NULL)
return (-ENOMEM);
if (uucopy((void *)maskp, lmask, sz) != 0)
return (-EFAULT);
/*
* Make sure the mask contains at least one processor that is
* physically on the system. Reduce the user's mask to the set of
* physically present CPUs. Keep track of how many valid
* bits are set in the user's mask.
*/
for (found = 0, i = 0; i < sz * 8; i++) {
if (p_online(i, P_STATUS) == -1) {
/*
* This CPU doesn't exist, so clear this bit from
* the user's mask.
*/
lmask[BITINDEX(i)] &= ~BITSHIFT(i);
continue;
}
if ((lmask[BITINDEX(i)] & BITSHIFT(i)) == BITSHIFT(i)) {
found++;
cpuid = i;
}
}
if (found == 0) {
lx_debug("\tlx_sched_setaffinity: mask has no present CPUs\n");
return (-EINVAL);
}
/*
* If only one bit is set, bind the thread to that procesor;
* otherwise, clear the binding.
*/
if (found == 1) {
lx_debug("\tlx_sched_setaffinity: binding thread %d to cpu%d\n",
s_tid, cpuid);
if (processor_bind(P_LWPID, s_tid, cpuid, NULL) != 0)
/*
* It could be that the requested processor is offline,
* so we'll just abandon our good-natured attempt to
* bind to it.
*/
lx_debug("couldn't bind LWP %d to cpu %d: %s\n", s_tid,
cpuid, strerror(errno));
} else {
lx_debug("\tlx_sched_setaffinity: clearing thr %d binding\n",
s_tid);
if (processor_bind(P_LWPID, s_tid, PBIND_NONE, NULL) != 0) {
lx_debug("couldn't clear CPU binding for LWP %d: %s\n",
s_tid, strerror(errno));
}
}
/*
* Finally, ask the kernel to make a note of our current (though fairly
* meaningless) affinity mask.
*/
ret = syscall(SYS_brand, B_SET_AFFINITY_MASK, pid, sz, lmask);
return ((ret == 0) ? 0 : -errno);
}
int
lx_sched_getparam(uintptr_t pid, uintptr_t param)
{
int policy, ret;
pid_t s_pid;
lwpid_t s_tid;
struct sched_param sp;
if (((pid_t)pid < 0) || (param == NULL))
return (-EINVAL);
if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
return (-ESRCH);
/*
* If we're attempting to get information on our own process, we can
* get data on a per-thread basis; if not, punt and use the specified
* pid.
*/
if (s_pid == getpid()) {
if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0)
return (-ret);
} else {
if (sched_getparam(s_pid, &sp) == -1)
return (-errno);
if ((policy = sched_getscheduler(s_pid)) < 0)
return (-errno);
}
return (stol_sparam(policy, &sp, (struct lx_sched_param *)param));
}
int
lx_sched_setparam(uintptr_t pid, uintptr_t param)
{
int err, policy;
pid_t s_pid;
lwpid_t s_tid;
struct lx_sched_param lp;
struct sched_param sp;
if (((pid_t)pid < 0) || (param == NULL))
return (-EINVAL);
if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
return (-ESRCH);
if (s_pid == getpid()) {
struct sched_param dummy;
if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
return (-err);
} else
if ((policy = sched_getscheduler(s_pid)) < 0)
return (-errno);
lx_debug("sched_setparam(): current policy %d", policy);
if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
return (-errno);
/*
* In Linux, the only valid SCHED_OTHER scheduler priority is 0
*/
if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0))
return (-EINVAL);
if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp,
&sp)) != 0)
return (err);
/*
* Check if we're allowed to change the scheduler for the process.
*
* If we're operating on a thread, we can't just call
* pthread_setschedparam() because as all threads reside within a
* single Solaris process, Solaris will allow the modification
*
* If we're operating on a process, we can't just call sched_setparam()
* because Solaris will allow the call to succeed if the scheduler
* parameters do not differ from those being installed, but Linux wants
* the call to fail.
*/
if ((err = check_schedperms(s_pid)) != 0)
return (err);
if (s_pid == getpid())
return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0)
? -err : 0);
return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0);
}
int
lx_sched_rr_get_interval(uintptr_t pid, uintptr_t timespec)
{
struct timespec ts;
pid_t s_pid;
if ((pid_t)pid < 0)
return (-EINVAL);
if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0)
return (-ESRCH);
if (uucopy((struct timespec *)timespec, &ts,
sizeof (struct timespec)) != 0)
return (-errno);
return ((sched_rr_get_interval(s_pid, &ts) == -1) ? -errno : 0);
}
int
lx_sched_getscheduler(uintptr_t pid)
{
int policy, rv;
pid_t s_pid;
lwpid_t s_tid;
if ((pid_t)pid < 0)
return (-EINVAL);
if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
return (-ESRCH);
if (s_pid == getpid()) {
struct sched_param dummy;
if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
return (-rv);
} else
if ((policy = sched_getscheduler(s_pid)) < 0)
return (-errno);
/*
* Linux only supports certain policies; avoid confusing apps with
* alien policies.
*/
switch (policy) {
case SCHED_FIFO:
return (LX_SCHED_FIFO);
case SCHED_OTHER:
return (LX_SCHED_OTHER);
case SCHED_RR:
return (LX_SCHED_RR);
default:
break;
}
return (LX_SCHED_OTHER);
}
int
lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param)
{
int rt_pol;
int rv;
pid_t s_pid;
lwpid_t s_tid;
struct lx_sched_param lp;
struct sched_param sp;
if (((pid_t)pid < 0) || (param == NULL))
return (-EINVAL);
if ((rt_pol = validate_policy((int)policy)) < 0)
return (rt_pol);
if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param,
&sp)) != 0)
return (rv);
if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
return (-errno);
/*
* In Linux, the only valid SCHED_OTHER scheduler priority is 0
*/
if ((rt_pol == LX_SCHED_OTHER) && (lp.lx_sched_prio != 0))
return (-EINVAL);
if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
return (-ESRCH);
/*
* Check if we're allowed to change the scheduler for the process.
*
* If we're operating on a thread, we can't just call
* pthread_setschedparam() because as all threads reside within a
* single Solaris process, Solaris will allow the modification.
*
* If we're operating on a process, we can't just call
* sched_setscheduler() because Solaris will allow the call to succeed
* if the scheduler and scheduler parameters do not differ from those
* being installed, but Linux wants the call to fail.
*/
if ((rv = check_schedperms(s_pid)) != 0)
return (rv);
if (s_pid == getpid()) {
struct sched_param param;
int pol;
if ((pol = sched_getscheduler(s_pid)) != 0)
return (-errno);
/*
* sched_setscheduler() returns the previous scheduling policy
* on success, so call pthread_getschedparam() to get the
* current thread's scheduling policy and return that if the
* call to pthread_setschedparam() succeeds.
*/
if ((rv = pthread_getschedparam(s_tid, &pol, &param)) != 0)
return (-rv);
return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0)
? -rv : pol);
}
return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1)
? -errno : rv);
}
int
lx_sched_get_priority_min(uintptr_t policy)
{
/*
* In Linux, the only valid SCHED_OTHER scheduler priority is 0.
* Linux scheduling priorities are not alterable, so there is no
* Solaris translation necessary.
*/
switch (policy) {
case LX_SCHED_FIFO:
case LX_SCHED_RR:
return (LX_SCHED_PRIORITY_MIN_RRFIFO);
case LX_SCHED_OTHER:
return (LX_SCHED_PRIORITY_MIN_OTHER);
default:
break;
}
return (-EINVAL);
}
int
lx_sched_get_priority_max(uintptr_t policy)
{
/*
* In Linux, the only valid SCHED_OTHER scheduler priority is 0
* Linux scheduling priorities are not alterable, so there is no
* Solaris translation necessary.
*/
switch (policy) {
case LX_SCHED_FIFO:
case LX_SCHED_RR:
return (LX_SCHED_PRIORITY_MAX_RRFIFO);
case LX_SCHED_OTHER:
return (LX_SCHED_PRIORITY_MAX_OTHER);
default:
break;
}
return (-EINVAL);
}