lx_brand.c revision 319378d99bca1eacf64f0a464b0175bf66b422ab
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/inttypes.h>
#include <assert.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
#include <errno.h>
#include <syslog.h>
#include <signal.h>
#include <fcntl.h>
#include <synch.h>
#include <libelf.h>
#include <libgen.h>
#include <pthread.h>
#include <utime.h>
#include <dirent.h>
#include <ucontext.h>
#include <libintl.h>
#include <locale.h>
#include <sys/lx_debug.h>
#include <sys/lx_brand.h>
#include <sys/lx_types.h>
#include <sys/lx_statfs.h>
#include <sys/lx_ioctl.h>
#include <sys/lx_signal.h>
#include <sys/lx_syscall.h>
#include <sys/lx_thread.h>
#include <sys/lx_thunk_server.h>
/*
* Map solaris errno to the linux equivalent.
*/
static int stol_errno[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 42, 43, 44, 45, 46,
47, 48, 49, 50, 51, 35, 47, 22, 38, 22, /* 49 */
52, 53, 54, 55, 56, 57, 58, 59, 22, 22,
61, 61, 62, 63, 64, 65, 66, 67, 68, 69,
70, 71, 22, 22, 72, 22, 22, 74, 36, 75,
76, 77, 78, 79, 80, 81, 82, 83, 84, 38,
40, 85, 86, 39, 87, 88, 89, 90, 91, 92, /* 99 */
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
103, 104, 105, 106, 107, 22, 22, 22, 22, 22,
22, 22, 22, 108, 109, 110, 111, 112, 113, 114, /* 149 */
115, 116
};
char lx_release[128];
/*
* Map a linux locale ending string to the solaris equivalent.
*/
struct lx_locale_ending {
const char *linux_end; /* linux ending string */
const char *solaris_end; /* to transform with this string */
int le_size; /* linux ending string length */
int se_size; /* solaris ending string length */
};
static struct lx_locale_ending lx_locales[] = {
};
#define MAXLOCALENAMELEN 30
#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
#endif
/*
* This flag is part of the registration with the in-kernel brand module. It's
* used in lx_handler() to determine if we should go back into the kernel after
* a system call in case the kernel needs to perform some post-syscall work
* like tracing for example.
*/
int lx_traceflag;
#define NOSYS_NULL 1
#define NOSYS_NO_EQUIV 2
#define NOSYS_KERNEL 3
#define NOSYS_UNDOC 4
#define NOSYS_OBSOLETE 5
/*
* SYS_PASSTHRU denotes a system call we can just call on behalf of the
* branded process without having to translate the arguments.
*
* The restriction on this is that the call in question MUST return -1 to
* denote an error.
*/
#define SYS_PASSTHRU 5
static char *nosys_msgs[] = {
"Either not yet done, or we haven't come up with an excuse",
"No such Linux system call",
"No equivalent Solaris functionality",
"Unsupported, obsolete system call"
};
struct lx_sysent {
char *sy_name;
int (*sy_callc)();
char sy_flags;
char sy_narg;
};
/* Differs for kernel versions, set during lx_init */
static int lx_max_syscall;
static uintptr_t stack_bottom;
int lx_install = 0; /* install mode enabled if non-zero */
int lx_rpm_delay = 1;
int lx_strict = 0; /* "strict" mode enabled if non-zero */
int lx_verbose = 0; /* verbose mode enabled if non-zero */
int lx_debug_enabled = 0; /* debugging output enabled if non-zero */
int
{
return (0);
}
int
{
return (0);
}
static void
{
int i;
char buf[LX_MSG_MAXLEN];
/* LINTED [possible expansion issues] */
if (i == -1)
return;
/* if debugging is enabled, send this message to debug output */
if (lx_debug_enabled != 0)
/*
* If we are trying to print to stderr, we also want to send the
* message to syslog.
*/
if (fd == 2) {
/*
* We let the user choose whether or not to see these
* messages on the console.
*/
if (lx_verbose == 0)
return;
}
/* we retry in case of EINTR */
do {
}
/*PRINTFLIKE1*/
void
{
}
/*
* This is just a non-zero exit value which also isn't one that would allow
* us to easily detect if a branded process exited because of a recursive
* fatal error.
*/
#define LX_ERR_FATAL 42
/*
* Our own custom version of abort(), this routine will be used in place
* of the one located in libc. The primary difference is that this version
* will first reset the signal handler for SIGABRT to SIG_DFL, ensuring the
* SIGABRT sent causes us to dump core and is not caught by a user program.
*/
void
abort(void)
{
static int aborting = 0;
/* watch out for recursive calls to this function */
if (aborting != 0)
aborting = 1;
/*
* Block all signals here to avoid taking any signals while exiting
* in an effort to avoid any strange user interaction with our death.
*/
(void) sigfillset(&sigmask);
/*
* Our own version of abort(3C) that we know will never call
* a user-installed SIGABRT handler first. We WANT to die.
*
* Do this by resetting the handler to SIG_DFL, and releasing any
* held SIGABRTs.
*
* If no SIGABRTs are pending, send ourselves one.
*
* The while loop is a bit of overkill, but abort(3C) does it to
* assure it never returns so we will as well.
*/
for (;;) {
}
/*NOTREACHED*/
}
/*PRINTFLIKE1*/
void
{
}
/*PRINTFLIKE1*/
void
lx_err_fatal(char *msg, ...)
{
abort();
}
/*
* See if it is safe to alloca() sz bytes. Return 1 for yes, 0 for no.
*/
int
{
}
/*PRINTFLIKE1*/
void
lx_unsupported(char *msg, ...)
{
/* send the msg to the error stream */
/*
* If the user doesn't trust the application to responsibly
* handle ENOTSUP, we kill the application.
*/
if (lx_strict)
}
static int
{
/*
* If the system call takes 6 args, then libc has stashed them in
* memory at the address contained in %ebx. Except for some syscalls
* which store the 6th argument in %ebp.
*/
sizeof (args[0]) * 6) != 0)
return (-stol_errno[errno]);
} else {
}
return (0);
}
void
{
struct lx_sysent *s;
int syscall_num, ret;
s = &sysents[0];
else
s = &sysents[syscall_num];
goto out;
/*
* If the tracing flag is enabled we call into the brand-specific
* kernel module to handle the tracing activity (DTrace or ptrace).
* It would be tempting to perform DTrace activity in the brand
* module's syscall trap callback, rather than having to return
* to the kernel here, but -- since argument encoding can vary
* according to the specific system call -- that would require
* replicating the knowledge of argument decoding in the kernel
* module as well as here in the brand library.
*/
if (lx_traceflag != 0) {
/*
* Part of the ptrace "interface" is that on syscall entry
* %eax should be reported as -ENOSYS while the orig_eax
* field of the user structure needs to contain the actual
* system call number. If we end up stopping here, the
* controlling process will dig the lx_regs_t structure out of
* our stack.
*/
/*
* The external tracer may have modified the arguments to this
* system call. Refresh the argument cache to account for this.
*/
goto out;
}
goto out;
}
if (lx_debug_enabled != 0) {
const char *fmt;
switch (s->sy_narg) {
case 0:
fmt = "calling %s()";
break;
case 1:
fmt = "calling %s(0x%p)";
break;
case 2:
fmt = "calling %s(0x%p, 0x%p)";
break;
case 3:
fmt = "calling %s(0x%p, 0x%p, 0x%p)";
break;
case 4:
fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p)";
break;
case 5:
fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p, 0x%p)";
break;
case 6:
fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)";
break;
}
}
/*
* While a %gs of 0 is technically legal (as long as the
* application never dereferences memory using %gs), Solaris
* has its own ideas as to how a zero %gs should be handled in
* _update_sregs(), such that any 32-bit user process with a
* %gs of zero running on a system with a 64-bit kernel will
* have its %gs hidden base register stomped on on return from
* a system call, leaving an incorrect base address in place
* until the next time %gs is actually reloaded (forcing a
* reload of the base address from the appropriate descriptor
* table.)
*
* Of course the kernel will once again stomp on THAT base
* address when returning from a system call, resulting in an
* an application segmentation fault.
*
* To avoid this situation, disallow a save of a zero %gs
* here in order to try and capture any Linux process that
* attempts to make a syscall with a zero %gs installed.
*/
(void **)&lx_tsd)) != 0)
"%s: unable to read thread-specific data: %s"),
}
if (s->sy_flags == SYS_PASSTHRU)
else
} else {
/*
* If the return value is between -4096 and 0 we assume it's an
* error, so we translate the Solaris error number into the
* Linux equivalent.
*/
if (-ret >=
sizeof (stol_errno) / sizeof (stol_errno[0])) {
lx_debug("Invalid return value from emulated "
"syscall %d (%s): %d\n",
assert(0);
}
}
}
out:
/*
* %eax holds the return code from the system call.
*/
/*
* If the trace flag is set, bounce into the kernel to let it do
* any necessary tracing (DTrace or ptrace).
*/
if (lx_traceflag != 0) {
}
}
/* Transform the Linux locale name to make it look like a Solaris locale name */
static const char *
{
char *loc;
int i;
return ("C");
return ("");
/* replace the end of the locale name if it's a known pattern */
for (i = 0; i < sizeof (lx_locales) / sizeof (struct lx_locale_ending);
i++) {
continue;
continue; /* don't match */
>= mem_size)
return ("C"); /* size too small for the new name */
return ((const char *)translated_name_mem);
}
/* no match */
return ("");
}
static void
{
return;
return;
if (fd_new == -1)
return;
}
extern int set_l10n_alternate_root(char *path);
/*ARGSUSED*/
int
{
char *r;
int *p, err;
/* Look up the PID that serves as init for this zone */
"Unable to find PID for zone init process: %s"),
/*
* Ubuntu init will fail if its TERM environment variable is not set
* so if we are running init, and TERM is not set, we set term and
* reexec so that the new environment variable is propagated to the
* linux application stack.
*/
}
if ((set_l10n_alternate_root("/native") == 0) &&
sizeof (locale_translated_name))) != NULL) &&
(void) textdomain(TEXT_DOMAIN);
}
/*
* We need to shutdown all libc stdio. libc stdio normally goes to
* file descriptors, but since we're actually part of a linux
* process we don't own these file descriptors and we can't make
* any assumptions about their state.
*/
if (lx_get_kern_version() <= LX_KERN_2_4)
else
r = getenv("LX_RELEASE");
if (r == NULL) {
if (lx_get_kern_version() == LX_KERN_2_6)
sizeof (lx_release));
else
sizeof (lx_release));
} else {
}
/*
* Should we kill an application that attempts an unimplemented
* system call?
*/
lx_strict = 1;
lx_debug("STRICT mode enabled.\n");
}
/*
* Are we in install mode?
*/
lx_install = 1;
lx_debug("INSTALL mode enabled.\n");
}
/*
* Should we attempt to send messages to the screen?
*/
lx_verbose = 1;
lx_debug("VERBOSE mode enabled.\n");
}
lx_debug("branding myself and setting handler to 0x%p",
(void *)lx_handler_table);
/*
* condition in it. If it creates a child process to run a
* post-install script, and that child process completes too
* quickly, it will disappear before the parent notices. This
* causes the parent to hang forever waiting for the already dead
* child to die. I'm sure there's a Lazarus joke buried in here
* somewhere.
*
* Anyway, as a workaround, we make every child of an 'rpm' process
* sleep for 1 second, giving the parent a chance to enter its
* wait-for-the-child-to-die loop. Thay may be the hackiest trick
* in all of our Linux emulation code - and that's saying
* something.
*/
/*
* Register the address of the user-space handler with the lx
* brand module.
*/
/*
* Download data about the lx executable from the kernel.
*/
"failed to get required ELF data from the kernel"));
if (lx_ioctl_init() != 0)
"ioctl");
if (lx_stat_init() != 0)
"stat");
if (lx_statfs_init() != 0)
"statfs");
/*
* Find the aux vector on the stack.
*/
p = (int *)envp;
while (*p != NULL)
p++;
/*
* p is now pointing at the 0 word after the environ pointers. After
* that is the aux vectors.
*/
p++;
case AT_BASE:
break;
case AT_ENTRY:
break;
case AT_PHDR:
break;
case AT_PHENT:
break;
case AT_PHNUM:
break;
default:
break;
}
}
/* Do any thunk server initalization. */
/* Setup signal handler information. */
if (lx_siginit())
"failed to initialize lx signals for the branded process"));
/* Setup thread-specific data area for managing linux threads. */
/* Initialize the thread specific data for this thread. */
"Unable to initialize thread-specific data: %s"),
/*
* Save the current context of this thread.
* We'll restore this context when this thread attempts to exit.
*/
"Unable to initialize thread-specific exit context: %s"),
if (lx_tsd.lxtsd_exit == 0) {
/* lx_runexe() never returns. */
assert(0);
}
/*
* We are here because the Linux application called the exit() or
* exit_group() system call. In turn the brand library did a
* setcontext() to jump to the thread context state we saved above.
*/
else
assert(0);
/*NOTREACHED*/
return (0);
}
/*
* Walk back through the stack until we find the lx_emulate() frame.
*/
lx_syscall_regs(void)
{
/* LINTED - alignment */
}
}
int
{
if (lpid == 0) {
} else {
return (-errno);
/*
* If the returned pid is -1, that indicates we tried to
* look up the PID for init, but that process no longer
* exists.
*/
if (pid == -1)
return (-ESRCH);
}
return (-errno);
return (-errno);
return (0);
}
int
{
}
char *
{
char path_proc[MAXPATHLEN];
int n;
if (fd < 0)
return (NULL);
return (NULL);
return (NULL);
buf[n] = '\0';
return (buf);
}
/*
* Create a translation routine that jumps to a particular emulation
* module syscall.
*/
int \
{ \
int r; \
lx_debug("\tsyscall %d re-vectoring to lx kernel module " \
return ((r == -1) ? -errno : r); \
}
/* The following system calls only exist in kernel 2.6 and greater */
};