lx_brand.c revision 07678296a66d425e65d218730836cc0162649ce4
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/machbrand.h>
#include <sys/lx_syscalls.h>
#include <sys/lx_futex.h>
#include <sys/lx_brand.h>
#include <sys/pathname.h>
#include <sys/privregs.h>
#include <sys/archsystm.h>
int lx_debug = 0;
void lx_setbrand(proc_t *);
extern void lx_setrval(klwp_t *, int, int);
extern void lx_exec();
extern int lx_initlwp(klwp_t *);
extern void lx_exitlwp(klwp_t *);
extern void lx_freelwp(klwp_t *);
static int lx_systrace_enabled;
/* lx brand */
};
struct brand_mach_ops lx_mops = {
NULL,
NULL,
NULL,
NULL,
};
"lx",
&lx_brops,
};
};
static struct modlinkage modlinkage = {
};
void
{
/*
* If init is dying and we aren't explicitly shutting down the zone
* or the system, then Solaris is about to restart init. The Linux
* init is not designed to handle a restart, which it interprets as
* a reboot. To give it a sane environment in which to run, we
* reboot the zone.
*/
if (p->p_pid == z->zone_proc_initpid) {
if (z->zone_boot_err == 0 &&
z->zone_restart_init &&
zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
} else {
p->p_brand_data = NULL;
p->p_brand = &native_brand;
}
}
void
lx_setbrand(proc_t *p)
{
int err;
/*
* This routine can only be called for single-threaded processes.
* Since lx_initlwp() can only fail if we run out of PIDs for
* multithreaded processes, we know that this can never fail.
*/
}
/* ARGSUSED */
int
{
if (attr == LX_ATTR_RESTART_INIT) {
return (ERANGE);
return (EFAULT);
return (EINVAL);
return (0);
}
return (EINVAL);
}
/* ARGSUSED */
int
{
if (attr == LX_ATTR_RESTART_INIT) {
return (ERANGE);
sizeof (boolean_t)) != 0)
return (EFAULT);
return (0);
}
return (-EINVAL);
}
/*
* Enable ptrace system call tracing for the given LWP. This is done by
* both setting the flag in that LWP's brand data (in the kernel) and setting
* the process-wide trace flag (in the brand library of the traced process).
*/
static int
{
proc_t *p;
kthread_t *t;
return (ESRCH);
sprunlock(p);
return (EPERM);
}
sprunlock(p);
return (ESRCH);
}
sprunlock(p);
return (ESRCH);
}
if (set) {
/*
* Enable the ptrace flag for this LWP and this process. Note
* that we will turn off the LWP's ptrace flag, but we don't
* turn off the process's ptrace flag.
*/
mutex_exit(&p->p_lock);
/*
* This can fail only in some rare corner cases where the
* process is exiting or we're completely out of memory. In
* these cases, it's sufficient to return an error to the ptrace
* consumer and leave the process-wide flag set.
*/
mutex_enter(&p->p_lock);
/*
* If we couldn't set the trace flag, unset the LWP's ptrace
* flag as there ptrace consumer won't expect this LWP to stop.
*/
if (ret != 0)
} else {
ret = 0;
}
sprunlock(p);
if (ret != 0)
return (ret);
}
static void
lx_ptrace_fire(void)
{
/*
* The ptrace flag only applies until the next event is encountered
* for the given LWP. If it's set, turn off the flag and poke the
* controlling process by raising a signal.
*/
}
}
void
lx_brand_systrace_enable(void)
{
extern void lx_brand_int80_enable(void);
lx_systrace_enabled = 1;
}
void
{
extern void lx_brand_int80_disable(void);
lx_systrace_enabled = 0;
}
/*
* Get the addresses of the user-space system call handler and attach it to
* the proc structure. Returning 0 indicates success; the value returned
* by the system call is the value stored in rval. Returning a non-zero
* value indicates a failure; the value returned is used to set errno, -1
* is returned from the syscall and the contents of rval are ignored. To
* set errno and have the syscall return a value other than -1 we can
* manually set errno and rval and return 0.
*/
int
{
int linux_call;
int error;
/*
* There is one operation that is suppored for non-branded
* process. B_EXEC_BRAND. This is the equilivant of an
* exec call, but the new process that is created will be
* a branded process.
*/
if (cmd == B_EXEC_BRAND) {
return (exec_common(
EBA_BRAND));
}
/* For all other operations this must be a branded process. */
switch (cmd) {
case B_REGISTER:
if (p->p_model == DATAMODEL_NATIVE) {
lx_print("Failed to copyin brand registration "
"at 0x%p\n", (void *)arg1);
return (EFAULT);
}
#ifdef _LP64
} else {
lx_print("Failed to copyin brand registration "
"at 0x%p\n", (void *)arg1);
return (EFAULT);
}
#endif
}
lx_print("Invalid brand library version (%u)\n",
return (EINVAL);
}
lx_print("Assigning brand 0x%p and handler 0x%p to proc 0x%p\n",
pd = p->p_brand_data;
*rval = 0;
return (0);
case B_TTYMODES:
/* This is necessary for emulating TCGETS ioctls. */
&termios_len) != DDI_SUCCESS)
return (EIO);
return (EFAULT);
}
*rval = 0;
return (0);
case B_ELFDATA:
sizeof (lx_elf_data_t)) != 0) {
return (*rval = -1);
}
*rval = 0;
return (0);
case B_EXEC_NATIVE:
error = exec_common(
if (error) {
return (*rval = -1);
}
return (*rval = 0);
case B_LPID_TO_SPAIR:
/*
* Given a Linux pid as arg1, return the Solaris pid in arg2 and
* the Solaris LWP in arg3. We also translate pid 1 (which is
* hardcoded in many applications) to the zone's init process.
*/
{
if (s_pid == -1)
s_pid = 1;
s_tid = 1;
&s_tid) < 0)
return (ESRCH);
sizeof (s_pid)) != 0 ||
return (EFAULT);
*rval = 0;
return (0);
}
case B_PTRACE_SYSCALL:
(int)arg3);
return (0);
case B_SYSENTRY:
if (lx_systrace_enabled) {
return (EFAULT);
}
pd = p->p_brand_data;
/*
* If neither DTrace not ptrace are interested in tracing
* this process any more, turn off the trace flag.
*/
*rval = 0;
return (0);
case B_SYSRETURN:
if (lx_systrace_enabled) {
}
pd = p->p_brand_data;
/*
* If neither DTrace not ptrace are interested in tracing
* this process any more, turn off the trace flag.
*/
*rval = 0;
return (0);
case B_SET_AFFINITY_MASK:
case B_GET_AFFINITY_MASK:
/*
* Retrieve or store the CPU affinity mask for the
* requested linux pid.
*
* arg1 is a linux PID (0 means curthread).
* arg2 is the size of the given mask.
* arg3 is the address of the affinity mask.
*/
default:
return (0);
}
}
return (EINVAL);
}
/*
* Copy the per-process brand data from a parent proc to a child.
*/
void
{
}
/*
* Currently, only 32-bit branded ELF executables are supported.
*/
#if defined(_LP64)
#define mapexec_brand mapexec32_brand
#endif /* _LP64 */
/*
* Exec routine called by elfexec() to load 32-bit Linux binaries.
*/
static int
{
int error;
{ AT_SUN_BRAND_LX_PHDR, 0 },
{ AT_SUN_BRAND_AUX2, 0 },
{ AT_SUN_BRAND_AUX3, 0 }
};
int interp;
int i;
lx_elf_data_t *edp =
/*
* Set the brandname and library name for the new process so that
* elfexec() puts them onto the stack.
*/
/*
* We will exec the brand library, and map in the linux linker and the
* linux executable.
*/
&nvp)) {
return (error);
}
return (error);
}
return (error);
/*
* Save off the important properties of the lx executable. The brand
* library will ask us for this data later, when it is ready to set
* things up for the lx executable.
*/
if (interp) {
/*
* This is a shared object executable, so we need to
* pick a reasonable place to put the heap. Just don't
* use the first page.
*/
}
/*
* If the program needs an interpreter (most do), map it in and
* store relevant information about it in the aux vector, where
* the brand library can find it.
*/
&nvp)) {
return (error);
}
return (error);
}
/*
* Now that we know the base address of the brand's linker,
* place it in the aux vector.
*/
} else {
/*
* This program has no interpreter. The lx brand library will
* jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
* so in this case, put the entry point of the main executable
* there.
*/
/*
* An executable with no interpreter, this must be a
* statically linked executable, which means we loaded
* it at the address specified in the elf header, in
* which case the e_entry field of the elf header is an
* absolute address.
*/
} else {
/*
* A shared object with no interpreter, we use the
* calculated address from above.
*/
}
/*
* Delay setting the brkbase until the first call to brk();
* see elfexec() for details.
*/
env.ex_brksize = 0;
}
setexecenv(&env);
/*
* We don't need to copy this stuff out. It is only used by our
* tools to locate the lx linker's debug section. But we should at
* least try to keep /proc's view of the aux vector consistent with
* what's on the process stack.
*/
sizeof (phdr_auxv32)) == -1)
return (EFAULT);
/*
* /proc uses the AT_ENTRY aux vector entry to deduce
* the location of the executable in the address space. The user
* structure contains a copy of the aux vector that needs to have those
* entries patched with the values of the real lx executable (they
* currently contain the values from the lx brand library that was
* elfexec'd, above).
*
* For live processes, AT_BASE is used to locate the linker segment,
* which /proc and friends will later use to find Solaris symbols
* (such as rtld_db_preinit). However, for core files, /proc uses
* AT_ENTRY to find the right segment to label as the executable.
* So we set AT_ENTRY to be the entry point of the linux executable,
* but leave AT_BASE to be the address of the Solaris linker.
*/
for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
}
return (0);
}
int
_init(void)
{
int err = 0;
lx_pid_init();
/* for lx_futex() */
if (err != 0) {
/*
* This looks drastic, but it should never happen. These
* two data structures should be completely free-able until
* they are used by Linux processes. Since the brand
* wasn't loaded there should be no Linux processes, and
* thus no way for these data structures to be modified.
*/
if (lx_futex_fini())
panic("lx brand module cannot be loaded or unloaded.");
}
return (err);
}
int
{
}
int
_fini(void)
{
int err;
int futex_done = 0;
/*
* If there are any zones using this brand, we can't allow it to be
* unloaded.
*/
if (brand_zone_count(&lx_brand))
return (EBUSY);
lx_pid_fini();
if ((err = lx_futex_fini()) != 0)
goto done;
futex_done = 1;
done:
if (err) {
/*
* If we can't unload the module, then we have to get it
* back into a sane state.
*/
lx_pid_init();
if (futex_done)
}
return (err);
}