s10_brand.c revision 8fd04b8338ed5093ec2d1e668fa620b7de44c177
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <errno.h>
#include <fcntl.h>
#include <dirent.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
#include <thread.h>
#include <sys/auxv.h>
#include <sys/brand.h>
#include <sys/inttypes.h>
#include <sys/lwp.h>
#include <sys/syscall.h>
#include <sys/systm.h>
#include <sys/utsname.h>
#include <sys/systeminfo.h>
#include <sys/zone.h>
#include <sys/stat.h>
#include <sys/mntent.h>
#include <sys/ctfs.h>
#include <sys/priv.h>
#include <sys/acctctl.h>
#include <libgen.h>
#include <bsm/audit.h>
#include <sys/crypto/ioctl.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_ioctl.h>
#include <sys/ucontext.h>
#include <sys/mntio.h>
#include <sys/mnttab.h>
#include <sys/attr.h>
#include <atomic.h>
#include <s10_brand.h>
#include <s10_misc.h>
/*
* Principles of emulation 101.
*
*
* *** Setting errno
*
* Just don't do it. This emulation library is loaded onto a
* seperate link map from the application who's address space we're
* running in. We have our own private copy of libc, so there for,
* the errno value accessible from here is is also private and changing
* it will not affect any errno value that the processes who's address
* space we are running in will see. To return an error condition we
* should return the negated errno value we'd like the system to return.
* For more information about this see the comment in s10_handler().
* Basically, when we return to the caller that initiated the system
* call it's their responsibility to set errno.
*
*
* *** Recursion Considerations
*
* When emulating system calls we need to be very careful about what
* library calls we invoke. Library calls should be kept to a minimum.
* One issue is that library calls can invoke system calls, so if we're
* emulating a system call and we invoke a library call that depends on
* that system call we will probably enter a recursive loop, which would
* be bad.
*
*
* *** Return Values.
*
* When declaring new syscall emulation functions, it is very important
* to to set the proper RV_* flags in the s10_sysent_table. Upon failure,
* syscall emulation fuctions should return an errno value. Upon success
* syscall emulation functions should return 0 and set the sysret_t return
* value parameters accordingly.
*
* There are five possible syscall macro wrappers used in the kernel's system
* call sysent table. These turn into the following return values:
* SYSENT_CL -> SYSENT_C or SYSENT_CI
* SYSENT_C SE_64RVAL RV_DEFAULT
* SYSENT_CI SE_32RVAL1 RV_DEFAULT
* SYSENT_2CI SE_32RVAL1|SE_32RVAL2 RV_32RVAL2
* SYSENT_AP SE_64RVAL RV_64RVAL
*
*
* *** Agent lwp considerations
*
* It is currently impossible to do any emulation for these system call
* when they are being invoked on behalf of an agent lwp. To understand why
* it's impossible you have to understand how agent lwp syscalls work.
*
* The agent lwp syscall process works as follows:
* 1 The controlling process stops the target.
* 2 The controlling process injects an agent lwp which is also stopped.
* This agent lwp assumes the userland stack and register values
* of another stopped lwp in the current process.
* 3 The controlling process configures the agent lwp to start
* executing the requested system call.
* 4 The controlling process configure /proc to stop the agent lwp when
* it enters the requested system call.
* 5 The controlling processes allows the agent lwp to start executing.
* 6 The agent lwp traps into the kernel to perform the requested system
* call and immediately stop.
* 7 The controlling process copies all the arguments for the requested
* system call onto the agent lwp's stack.
* 8 The controlling process configures /proc to stop the agent lwp
* when it completes the requested system call.
* 9 The controlling processes allows the agent lwp to start executing.
* 10 The agent lwp executes the system call and then stop before returning
* to userland.
* 11 The controlling process copies the return value and return arguments
* back from the agent lwps stack.
* 12 The controlling process destroys the agent lwp and restarts
* the target process.
*
* The fundamental problem is that when the agent executes the request
* system call in step 5, if we're emulating that system call then the
* lwp is redirected back to our emulation layer without blocking
* in the kernel. But our emulation layer can't access the arguments
* for the system call because they haven't been copied to the stack
* yet and they still only exist in the controlling processes address
* space. This prevents us from being able to do any emulation of
* agent lwp system calls. Hence, currently our brand trap interposition
* callback (s10_brand_syscall_callback_common) will detect if a system
* call is being made by an agent lwp, and if this is the case it will
* never redirect the system call to this emulation library.
*
* In the future, if this proves to be a problem the the easiest solution
* would probably be to replace the branded versions of these application
* with their native counterparts. Ie, truss, plimit, and pfiles could be
* replace with wrapper scripts that execute the native versions of these
* applications. In the case of plimit and pfiles this should be pretty
* strait forward. Truss would probably be more tricky since it can
* execute applications which would be branded applications, so in that
* case it might be necessary to create a loadable library which could
* be LD_PRELOADed into truss and this library would interpose on the
* exec() system call to allow truss to correctly execute branded
* processes. It should be pointed out that this solution could work
* because "native agent lwps" (ie, agent lwps created by native
* processes) can be treated differently from "branded aged lwps" (ie,
* agent lwps created by branded processes), since native agent lwps
* would presumably be making native system calls and hence not need
* any interposition.
*
*/
static zoneid_t zoneid;
static boolean_t emul_global_zone = B_FALSE;
static s10_emul_bitmap_t emul_bitmap;
pid_t zone_init_pid;
/*
* S10_FEATURE_IS_PRESENT is a macro that helps facilitate conditional
* emulation. For each constant N defined in the s10_emulated_features
* enumeration in usr/src/uts/common/brand/solaris10/s10_brand.h,
* S10_FEATURE_IS_PRESENT(N) is true iff the feature/backport represented by N
* is present in the Solaris 10 image hosted within the zone. In other words,
* S10_FEATURE_IS_PRESENT(N) is true iff the file /usr/lib/brand/solaris10/M,
* where M is the enum value of N, was present in the zone when the zone booted.
*
*
* *** Sample Usage
*
* Suppose that you need to backport a fix to Solaris 10 and there is
* emulation in place for the fix. Suppose further that the emulation won't be
* needed if the fix is backported (i.e., if the fix is present in the hosted
* Solaris 10 environment, then the brand won't need the emulation). Then if
* you add a constant named "S10_FEATURE_X" to the end of the
* s10_emulated_features enumeration that represents the backported fix and
* S10_FEATURE_X evaluates to four, then you should create a file named
* /usr/lib/brand/solaris10/4 as part of your backport. Additionally, you
* should retain the aforementioned emulation but modify it so that it's
* performed only when S10_FEATURE_IS_PRESENT(S10_FEATURE_X) is false. Thus the
* emulation function should look something like the following:
*
* static int
* my_emul_function(sysret_t *rv, ...)
* {
* if (S10_FEATURE_IS_PRESENT(S10_FEATURE_X)) {
* // Don't emulate
* return (__systemcall(rv, ...));
* } else {
* // Emulate whatever needs to be emulated when the
* // backport isn't present in the Solaris 10 image.
* }
* }
*/
#define S10_FEATURE_IS_PRESENT(s10_emulated_features_constant) \
((emul_bitmap[(s10_emulated_features_constant) >> 3] & \
(1 << ((s10_emulated_features_constant) & 0x7))) != 0)
#define EMULATE(cb, args) { (sysent_cb_t)(cb), (args) }
#define NOSYS EMULATE(s10_unimpl, (0 | RV_DEFAULT))
typedef long (*sysent_cb_t)();
typedef struct s10_sysent_table {
sysent_cb_t st_callc;
uintptr_t st_args;
} s10_sysent_table_t;
s10_sysent_table_t s10_sysent_table[];
#define S10_UTS_RELEASE "5.10"
#define S10_UTS_VERSION "Generic_Virtual"
/*LINTED: static unused*/
static volatile int s10_abort_err;
/*LINTED: static unused*/
static volatile const char *s10_abort_msg;
/*LINTED: static unused*/
static volatile const char *s10_abort_file;
/*LINTED: static unused*/
static volatile int s10_abort_line;
extern int errno;
/*ARGSUSED*/
void
_s10_abort(int err, const char *msg, const char *file, int line)
{
sysret_t rval;
/* Save the error message into convenient globals */
s10_abort_err = err;
s10_abort_msg = msg;
s10_abort_file = file;
s10_abort_line = line;
/* kill ourselves */
abort();
/* If abort() didn't work, try something stronger. */
(void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGKILL);
}
int
s10_uucopy(const void *from, void *to, size_t size)
{
sysret_t rval;
if (__systemcall(&rval, SYS_uucopy + 1024, from, to, size) != 0)
return (EFAULT);
return (0);
}
/*
* ATTENTION: uucopystr() does NOT ensure that string are null terminated!
*/
int
s10_uucopystr(const void *from, void *to, size_t size)
{
sysret_t rval;
if (__systemcall(&rval, SYS_uucopystr + 1024, from, to, size) != 0)
return (EFAULT);
return (0);
}
/*
* Figures out the PID of init for the zone. Also returns a boolean
* indicating whether this process currently has that pid: if so,
* then at this moment, we are init.
*/
static boolean_t
get_initpid_info(void)
{
pid_t pid;
sysret_t rval;
int err;
/*
* Determine the current process PID and the PID of the zone's init.
* We use care not to call getpid() here, because we're not supposed
* to call getpid() until after the program is fully linked-- the
* first call to getpid() is a signal from the linker to debuggers
* that linking has been completed.
*/
if ((err = __systemcall(&rval, SYS_brand,
B_S10_PIDINFO, &pid, &zone_init_pid)) != 0) {
s10_abort(err, "Failed to get init's pid");
}
/*
* Note that we need to be cautious with the pid we get back--
* it should not be stashed and used in place of getpid(), since
* we might fork(2). So we keep zone_init_pid and toss the pid
* we otherwise got.
*/
if (pid == zone_init_pid)
return (B_TRUE);
return (B_FALSE);
}
/*
* This function is defined to be NOSYS but it won't be called from the
* the kernel since the NOSYS system calls are not enabled in the kernel.
* Thus, the only time this function is called is directly from within the
* indirect system call path.
*/
/*ARGSUSED*/
static long
s10_unimpl(sysret_t *rv, uintptr_t p1)
{
sysret_t rval;
/*
* We'd like to print out some kind of error message here like
* "unsupported syscall", but we can't because it's not safe to
* assume that stderr or STDERR_FILENO actually points to something
* that is a terminal, and if we wrote to those files we could
* inadvertantly write to some applications open files, which would
* be bad.
*
* Normally, if an application calls an invalid system call
* it get a SIGSYS sent to it. So we'll just go ahead and send
* ourselves a signal here. Note that this is far from ideal since
* if the application has registered a signal handler, that signal
* handler may recieve a ucontext_t as the third parameter to
* indicate the context of the process when the signal was
* generated, and in this case that context will not be what the
* application is expecting. Hence, we should probably create a
* brandsys() kernel function that can deliver the signal to us
* with the correct ucontext_t.
*/
(void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGSYS);
return (ENOSYS);
}
#if defined(__sparc) && !defined(__sparcv9)
/*
* Yuck. For 32-bit sparc applications, handle indirect system calls.
* Note that we declare this interface to use the maximum number of
* system call arguments. If we recieve a system call that uses less
* arguments, then the additional arguments will be garbage, but they
* will also be ignored so that should be ok.
*/
static long
s10_indir(sysret_t *rv, int code,
uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4,
uintptr_t a5, uintptr_t a6, uintptr_t a7)
{
s10_sysent_table_t *sst = &(s10_sysent_table[code]);
s10_assert(code < NSYSCALL);
switch (sst->st_args & NARGS_MASK) {
case 0:
return ((sst->st_callc)(rv));
case 1:
return ((sst->st_callc)(rv, a0));
case 2:
return ((sst->st_callc)(rv, a0, a1));
case 3:
return ((sst->st_callc)(rv, a0, a1, a2));
case 4:
return ((sst->st_callc)(rv, a0, a1, a2, a3));
case 5:
return ((sst->st_callc)(rv, a0, a1, a2, a3, a4));
case 6:
return ((sst->st_callc)(rv, rv, a0, a1, a2, a3, a4, a5));
case 7:
return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6));
case 8:
return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6, a7));
}
s10_abort(0, "invalid entry in s10_sysent_table");
return (EINVAL);
}
#endif /* __sparc && !__sparcv9 */
/* Free the thread-local storage provided by mntfs_get_mntentbuf(). */
static void
mntfs_free_mntentbuf(void *arg)
{
struct mntentbuf *embufp = arg;
if (embufp == NULL)
return;
if (embufp->mbuf_emp)
free(embufp->mbuf_emp);
if (embufp->mbuf_buf)
free(embufp->mbuf_buf);
bzero(embufp, sizeof (struct mntentbuf));
free(embufp);
}
/* Provide the thread-local storage required by mntfs_ioctl(). */
static struct mntentbuf *
mntfs_get_mntentbuf(size_t size)
{
static mutex_t keylock;
static thread_key_t key;
static int once_per_keyname = 0;
void *tsd = NULL;
struct mntentbuf *embufp;
/* Create the key. */
if (!once_per_keyname) {
(void) mutex_lock(&keylock);
if (!once_per_keyname) {
if (thr_keycreate(&key, mntfs_free_mntentbuf)) {
(void) mutex_unlock(&keylock);
return (NULL);
} else {
once_per_keyname++;
}
}
(void) mutex_unlock(&keylock);
}
/*
* The thread-specific datum for this key is the address of a struct
* mntentbuf. If this is the first time here then we allocate the struct
* and its contents, and associate its address with the thread; if there
* are any problems then we abort.
*/
if (thr_getspecific(key, &tsd))
return (NULL);
if (tsd == NULL) {
if (!(embufp = calloc(1, sizeof (struct mntentbuf))) ||
!(embufp->mbuf_emp = malloc(sizeof (struct extmnttab))) ||
thr_setspecific(key, embufp)) {
mntfs_free_mntentbuf(embufp);
return (NULL);
}
} else {
embufp = tsd;
}
/* Return the buffer, resizing it if necessary. */
if (size > embufp->mbuf_bufsize) {
if (embufp->mbuf_buf)
free(embufp->mbuf_buf);
if ((embufp->mbuf_buf = malloc(size)) == NULL) {
embufp->mbuf_bufsize = 0;
return (NULL);
} else {
embufp->mbuf_bufsize = size;
}
}
return (embufp);
}
/*
* The MNTIOC_GETMNTENT command in this release differs from that in early
* versions of Solaris 10.
*
* Previously, the command would copy a pointer to a struct extmnttab to an
* address provided as an argument. The pointer would be somewhere within a
* mapping already present within the user's address space. In addition, the
* text to which the struct's members pointed would also be within a
* pre-existing mapping. Now, the user is required to allocate memory for both
* the struct and the text buffer, and to pass the address of each within a
* struct mntentbuf. In order to conceal these details from a Solaris 10 client
* we allocate some thread-local storage in which to create the necessary data
* structures; this is static, thread-safe memory that will be cleaned up
* without the caller's intervention.
*
* MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY are new in this release; they should
* not work for older clients.
*/
int
mntfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
{
int err;
struct stat statbuf;
struct mntentbuf *embufp;
static size_t bufsize = MNT_LINE_MAX;
/* Do not emulate mntfs commands from up-to-date clients. */
if (S10_FEATURE_IS_PRESENT(S10_FEATURE_ALTERED_MNTFS_IOCTL))
return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
/* Do not emulate mntfs commands directed at other file systems. */
if ((err = __systemcall(rval, SYS_fstatat + 1024,
fdes, NULL, &statbuf, 0)) != 0)
return (err);
if (strcmp(statbuf.st_fstype, MNTTYPE_MNTFS) != 0)
return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
if (cmd == MNTIOC_GETEXTMNTENT || cmd == MNTIOC_GETMNTANY)
return (EINVAL);
if ((embufp = mntfs_get_mntentbuf(bufsize)) == NULL)
return (ENOMEM);
/*
* MNTIOC_GETEXTMNTENT advances the file pointer once it has
* successfully copied out the result to the address provided. We
* therefore need to check the user-supplied address now since the
* one we'll be providing is guaranteed to work.
*/
if (s10_uucopy(&embufp->mbuf_emp, (void *)arg, sizeof (void *)) != 0)
return (EFAULT);
/*
* Keep retrying for as long as we fail for want of a large enough
* buffer.
*/
for (;;) {
if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes,
MNTIOC_GETEXTMNTENT, embufp)) != 0)
return (err);
if (rval->sys_rval1 == MNTFS_TOOLONG) {
/* The buffer wasn't large enough. */
(void) atomic_swap_ulong((unsigned long *)&bufsize,
2 * embufp->mbuf_bufsize);
if ((embufp = mntfs_get_mntentbuf(bufsize)) == NULL)
return (ENOMEM);
} else {
break;
}
}
if (s10_uucopy(&embufp->mbuf_emp, (void *)arg, sizeof (void *)) != 0)
return (EFAULT);
return (0);
}
/*
* Assign the structure member value from the s (source) structure to the
* d (dest) structure.
*/
#define struct_assign(d, s, val) (((d).val) = ((s).val))
/*
* The CRYPTO_GET_FUNCTION_LIST parameter structure crypto_function_list_t
* changed between S10 and Nevada, so we have to emulate the old S10
* crypto_function_list_t structure when interposing on the ioctl syscall.
*/
typedef struct s10_crypto_function_list {
boolean_t fl_digest_init;
boolean_t fl_digest;
boolean_t fl_digest_update;
boolean_t fl_digest_key;
boolean_t fl_digest_final;
boolean_t fl_encrypt_init;
boolean_t fl_encrypt;
boolean_t fl_encrypt_update;
boolean_t fl_encrypt_final;
boolean_t fl_decrypt_init;
boolean_t fl_decrypt;
boolean_t fl_decrypt_update;
boolean_t fl_decrypt_final;
boolean_t fl_mac_init;
boolean_t fl_mac;
boolean_t fl_mac_update;
boolean_t fl_mac_final;
boolean_t fl_sign_init;
boolean_t fl_sign;
boolean_t fl_sign_update;
boolean_t fl_sign_final;
boolean_t fl_sign_recover_init;
boolean_t fl_sign_recover;
boolean_t fl_verify_init;
boolean_t fl_verify;
boolean_t fl_verify_update;
boolean_t fl_verify_final;
boolean_t fl_verify_recover_init;
boolean_t fl_verify_recover;
boolean_t fl_digest_encrypt_update;
boolean_t fl_decrypt_digest_update;
boolean_t fl_sign_encrypt_update;
boolean_t fl_decrypt_verify_update;
boolean_t fl_seed_random;
boolean_t fl_generate_random;
boolean_t fl_session_open;
boolean_t fl_session_close;
boolean_t fl_session_login;
boolean_t fl_session_logout;
boolean_t fl_object_create;
boolean_t fl_object_copy;
boolean_t fl_object_destroy;
boolean_t fl_object_get_size;
boolean_t fl_object_get_attribute_value;
boolean_t fl_object_set_attribute_value;
boolean_t fl_object_find_init;
boolean_t fl_object_find;
boolean_t fl_object_find_final;
boolean_t fl_key_generate;
boolean_t fl_key_generate_pair;
boolean_t fl_key_wrap;
boolean_t fl_key_unwrap;
boolean_t fl_key_derive;
boolean_t fl_init_token;
boolean_t fl_init_pin;
boolean_t fl_set_pin;
boolean_t prov_is_hash_limited;
uint32_t prov_hash_threshold;
uint32_t prov_hash_limit;
} s10_crypto_function_list_t;
typedef struct s10_crypto_get_function_list {
uint_t fl_return_value;
crypto_provider_id_t fl_provider_id;
s10_crypto_function_list_t fl_list;
} s10_crypto_get_function_list_t;
/*
* The structure returned by the CRYPTO_GET_FUNCTION_LIST ioctl on /dev/crypto
* increased in size due to:
* 6482533 Threshold for HW offload via PKCS11 interface
* between S10 and Nevada. This is a relatively simple process of filling
* in the S10 structure fields with the Nevada data.
*
* We stat the device to make sure that the ioctl is meant for /dev/crypto.
*
*/
static int
crypto_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
{
int err;
s10_crypto_get_function_list_t s10_param;
crypto_get_function_list_t native_param;
static dev_t crypto_dev = (dev_t)-1;
struct stat sbuf;
if (crypto_dev == (dev_t)-1) {
if ((err = __systemcall(rval, SYS_fstatat + 1024,
AT_FDCWD, "/dev/crypto", &sbuf, 0)) != 0)
goto nonemuioctl;
crypto_dev = major(sbuf.st_rdev);
}
if ((err = __systemcall(rval, SYS_fstatat + 1024,
fdes, NULL, &sbuf, 0)) != 0)
return (err);
/* Each open fd of /dev/crypto gets a new minor device. */
if (major(sbuf.st_rdev) != crypto_dev)
goto nonemuioctl;
if (s10_uucopy((const void *)arg, &s10_param, sizeof (s10_param)) != 0)
return (EFAULT);
struct_assign(native_param, s10_param, fl_provider_id);
if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd,
&native_param)) != 0)
return (err);
struct_assign(s10_param, native_param, fl_return_value);
struct_assign(s10_param, native_param, fl_provider_id);
struct_assign(s10_param, native_param, fl_list.fl_digest_init);
struct_assign(s10_param, native_param, fl_list.fl_digest);
struct_assign(s10_param, native_param, fl_list.fl_digest_update);
struct_assign(s10_param, native_param, fl_list.fl_digest_key);
struct_assign(s10_param, native_param, fl_list.fl_digest_final);
struct_assign(s10_param, native_param, fl_list.fl_encrypt_init);
struct_assign(s10_param, native_param, fl_list.fl_encrypt);
struct_assign(s10_param, native_param, fl_list.fl_encrypt_update);
struct_assign(s10_param, native_param, fl_list.fl_encrypt_final);
struct_assign(s10_param, native_param, fl_list.fl_decrypt_init);
struct_assign(s10_param, native_param, fl_list.fl_decrypt);
struct_assign(s10_param, native_param, fl_list.fl_decrypt_update);
struct_assign(s10_param, native_param, fl_list.fl_decrypt_final);
struct_assign(s10_param, native_param, fl_list.fl_mac_init);
struct_assign(s10_param, native_param, fl_list.fl_mac);
struct_assign(s10_param, native_param, fl_list.fl_mac_update);
struct_assign(s10_param, native_param, fl_list.fl_mac_final);
struct_assign(s10_param, native_param, fl_list.fl_sign_init);
struct_assign(s10_param, native_param, fl_list.fl_sign);
struct_assign(s10_param, native_param, fl_list.fl_sign_update);
struct_assign(s10_param, native_param, fl_list.fl_sign_final);
struct_assign(s10_param, native_param, fl_list.fl_sign_recover_init);
struct_assign(s10_param, native_param, fl_list.fl_sign_recover);
struct_assign(s10_param, native_param, fl_list.fl_verify_init);
struct_assign(s10_param, native_param, fl_list.fl_verify);
struct_assign(s10_param, native_param, fl_list.fl_verify_update);
struct_assign(s10_param, native_param, fl_list.fl_verify_final);
struct_assign(s10_param, native_param, fl_list.fl_verify_recover_init);
struct_assign(s10_param, native_param, fl_list.fl_verify_recover);
struct_assign(s10_param, native_param,
fl_list.fl_digest_encrypt_update);
struct_assign(s10_param, native_param,
fl_list.fl_decrypt_digest_update);
struct_assign(s10_param, native_param, fl_list.fl_sign_encrypt_update);
struct_assign(s10_param, native_param,
fl_list.fl_decrypt_verify_update);
struct_assign(s10_param, native_param, fl_list.fl_seed_random);
struct_assign(s10_param, native_param, fl_list.fl_generate_random);
struct_assign(s10_param, native_param, fl_list.fl_session_open);
struct_assign(s10_param, native_param, fl_list.fl_session_close);
struct_assign(s10_param, native_param, fl_list.fl_session_login);
struct_assign(s10_param, native_param, fl_list.fl_session_logout);
struct_assign(s10_param, native_param, fl_list.fl_object_create);
struct_assign(s10_param, native_param, fl_list.fl_object_copy);
struct_assign(s10_param, native_param, fl_list.fl_object_destroy);
struct_assign(s10_param, native_param, fl_list.fl_object_get_size);
struct_assign(s10_param, native_param,
fl_list.fl_object_get_attribute_value);
struct_assign(s10_param, native_param,
fl_list.fl_object_set_attribute_value);
struct_assign(s10_param, native_param, fl_list.fl_object_find_init);
struct_assign(s10_param, native_param, fl_list.fl_object_find);
struct_assign(s10_param, native_param, fl_list.fl_object_find_final);
struct_assign(s10_param, native_param, fl_list.fl_key_generate);
struct_assign(s10_param, native_param, fl_list.fl_key_generate_pair);
struct_assign(s10_param, native_param, fl_list.fl_key_wrap);
struct_assign(s10_param, native_param, fl_list.fl_key_unwrap);
struct_assign(s10_param, native_param, fl_list.fl_key_derive);
struct_assign(s10_param, native_param, fl_list.fl_init_token);
struct_assign(s10_param, native_param, fl_list.fl_init_pin);
struct_assign(s10_param, native_param, fl_list.fl_set_pin);
struct_assign(s10_param, native_param, fl_list.prov_is_hash_limited);
struct_assign(s10_param, native_param, fl_list.prov_hash_threshold);
struct_assign(s10_param, native_param, fl_list.prov_hash_limit);
return (s10_uucopy(&s10_param, (void *)arg, sizeof (s10_param)));
nonemuioctl:
return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
}
/*
* The process contract CT_TGET and CT_TSET parameter structure ct_param_t
* changed between S10 and Nevada, so we have to emulate the old S10
* ct_param_t structure when interposing on the ioctl syscall.
*/
typedef struct s10_ct_param {
uint32_t ctpm_id;
uint32_t ctpm_pad;
uint64_t ctpm_value;
} s10_ct_param_t;
/*
* We have to emulate process contract ioctls for init(1M) because the
* ioctl parameter structure changed between S10 and Nevada. This is
* a relatively simple process of filling Nevada structure fields,
* shuffling values, and initiating a native system call.
*
* For now, we'll assume that all consumers of CT_TGET and CT_TSET will
* need emulation. We'll issue a stat to make sure that the ioctl
* is meant for the contract file system.
*
*/
static int
ctfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
{
int err;
s10_ct_param_t s10param;
ct_param_t param;
struct stat statbuf;
if ((err = __systemcall(rval, SYS_fstatat + 1024,
fdes, NULL, &statbuf, 0)) != 0)
return (err);
if (strcmp(statbuf.st_fstype, MNTTYPE_CTFS) != 0)
return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
if (s10_uucopy((const void *)arg, &s10param, sizeof (s10param)) != 0)
return (EFAULT);
param.ctpm_id = s10param.ctpm_id;
param.ctpm_size = sizeof (uint64_t);
param.ctpm_value = &s10param.ctpm_value;
if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &param))
!= 0)
return (err);
if (cmd == CT_TGET)
return (s10_uucopy(&s10param, (void *)arg, sizeof (s10param)));
return (0);
}
typedef struct s10_zfs_cmd {
char zc_name[MAXPATHLEN];
char zc_value[MAXPATHLEN * 2];
char zc_string[MAXNAMELEN];
uint64_t zc_guid;
uint64_t zc_nvlist_conf; /* really (char *) */
uint64_t zc_nvlist_conf_size;
uint64_t zc_nvlist_src; /* really (char *) */
uint64_t zc_nvlist_src_size;
uint64_t zc_nvlist_dst; /* really (char *) */
uint64_t zc_nvlist_dst_size;
uint64_t zc_cookie;
uint64_t zc_objset_type;
uint64_t zc_perm_action;
uint64_t zc_history; /* really (char *) */
uint64_t zc_history_len;
uint64_t zc_history_offset;
uint64_t zc_obj;
/* Solaris Next added zc_iflags member here */
zfs_share_t zc_share;
dmu_objset_stats_t zc_objset_stats;
struct drr_begin zc_begin_record;
zinject_record_t zc_inject_record;
} s10_zfs_cmd_t;
/*
* There is a difference in the zfs_cmd_t ioctl parameter between S10 and
* Solaris Next so we need to translate between the two structures when
* making ZFS ioctls.
*/
static int
zfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
{
int err;
s10_zfs_cmd_t s10_param;
zfs_cmd_t native_param;
static dev_t zfs_dev = (dev_t)-1;
struct stat sbuf;
if (zfs_dev == (dev_t)-1) {
if ((err = __systemcall(rval, SYS_fstatat + 1024,
AT_FDCWD, "/dev/zfs", &sbuf, 0) != 0) != 0)
goto nonemuioctl;
zfs_dev = major(sbuf.st_rdev);
}
if ((err = __systemcall(rval, SYS_fstatat + 1024,
fdes, NULL, &sbuf, 0)) != 0)
return (err);
if (major(sbuf.st_rdev) != zfs_dev)
goto nonemuioctl;
if (s10_uucopy((const void *)arg, &s10_param, sizeof (s10_param)) != 0)
return (EFAULT);
bcopy((const void *)s10_param.zc_name, (void *)native_param.zc_name,
sizeof (s10_param.zc_name));
bcopy((const void *)s10_param.zc_value, (void *)native_param.zc_value,
sizeof (s10_param.zc_value));
bcopy((const void *)s10_param.zc_string, (void *)native_param.zc_string,
sizeof (s10_param.zc_string));
struct_assign(native_param, s10_param, zc_guid);
struct_assign(native_param, s10_param, zc_nvlist_conf);
struct_assign(native_param, s10_param, zc_nvlist_conf_size);
struct_assign(native_param, s10_param, zc_nvlist_src);
struct_assign(native_param, s10_param, zc_nvlist_src_size);
struct_assign(native_param, s10_param, zc_nvlist_dst);
struct_assign(native_param, s10_param, zc_nvlist_dst_size);
struct_assign(native_param, s10_param, zc_cookie);
struct_assign(native_param, s10_param, zc_objset_type);
struct_assign(native_param, s10_param, zc_perm_action);
struct_assign(native_param, s10_param, zc_history);
struct_assign(native_param, s10_param, zc_history_len);
struct_assign(native_param, s10_param, zc_history_offset);
struct_assign(native_param, s10_param, zc_obj);
native_param.zc_iflags = 0;
struct_assign(native_param, s10_param, zc_share);
struct_assign(native_param, s10_param, zc_objset_stats);
struct_assign(native_param, s10_param, zc_begin_record);
struct_assign(native_param, s10_param, zc_inject_record);
err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &native_param);
bcopy((const void *)native_param.zc_name, (void *)s10_param.zc_name,
sizeof (s10_param.zc_name));
bcopy((const void *)native_param.zc_value, (void *)s10_param.zc_value,
sizeof (s10_param.zc_value));
bcopy((const void *)native_param.zc_string, (void *)s10_param.zc_string,
sizeof (s10_param.zc_string));
struct_assign(s10_param, native_param, zc_guid);
struct_assign(s10_param, native_param, zc_nvlist_conf);
struct_assign(s10_param, native_param, zc_nvlist_conf_size);
struct_assign(s10_param, native_param, zc_nvlist_src);
struct_assign(s10_param, native_param, zc_nvlist_src_size);
struct_assign(s10_param, native_param, zc_nvlist_dst);
struct_assign(s10_param, native_param, zc_nvlist_dst_size);
struct_assign(s10_param, native_param, zc_cookie);
struct_assign(s10_param, native_param, zc_objset_type);
struct_assign(s10_param, native_param, zc_perm_action);
struct_assign(s10_param, native_param, zc_history);
struct_assign(s10_param, native_param, zc_history_len);
struct_assign(s10_param, native_param, zc_history_offset);
struct_assign(s10_param, native_param, zc_obj);
struct_assign(s10_param, native_param, zc_share);
struct_assign(s10_param, native_param, zc_objset_stats);
struct_assign(s10_param, native_param, zc_begin_record);
struct_assign(s10_param, native_param, zc_inject_record);
(void) s10_uucopy(&s10_param, (void *)arg, sizeof (s10_param));
return (err);
nonemuioctl:
return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
}
int
s10_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
{
switch (cmd) {
case CRYPTO_GET_FUNCTION_LIST:
return (crypto_ioctl(rval, fdes, cmd, arg));
case CT_TGET:
/*FALLTHRU*/
case CT_TSET:
return (ctfs_ioctl(rval, fdes, cmd, arg));
case MNTIOC_GETMNTENT:
/*FALLTHRU*/
case MNTIOC_GETEXTMNTENT:
/*FALLTHRU*/
case MNTIOC_GETMNTANY:
return (mntfs_ioctl(rval, fdes, cmd, arg));
}
if ((cmd & 0xff00) == ZFS_IOC)
return (zfs_ioctl(rval, fdes, cmd, arg));
return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
}
/*
* Unfortunately, pwrite()'s behavior differs between S10 and Nevada when
* applied to files opened with O_APPEND. The offset argument is ignored and
* the buffer is appended to the target file in S10, whereas the current file
* position is ignored in Nevada (i.e., pwrite() acts as though the target file
* wasn't opened with O_APPEND). This is a result of the fix for CR 6655660
* (pwrite() must ignore the O_APPEND/FAPPEND flag).
*
* We emulate the old S10 pwrite() behavior by checking whether the target file
* was opened with O_APPEND. If it was, then invoke the write() system call
* instead of pwrite(); otherwise, invoke the pwrite() system call as usual.
*/
static int
s10_pwrite(sysret_t *rval, int fd, const void *bufferp, size_t num_bytes,
off_t offset)
{
int err;
if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0)
return (err);
if (rval->sys_rval1 & O_APPEND)
return (__systemcall(rval, SYS_write + 1024, fd, bufferp,
num_bytes));
return (__systemcall(rval, SYS_pwrite + 1024, fd, bufferp, num_bytes,
offset));
}
#if !defined(_LP64)
/*
* This is the large file version of the pwrite() system call for 32-bit
* processes. This exists for the same reason that s10_pwrite() exists; see
* the comment above s10_pwrite().
*/
static int
s10_pwrite64(sysret_t *rval, int fd, const void *bufferp, size32_t num_bytes,
uint32_t offset_1, uint32_t offset_2)
{
int err;
if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0)
return (err);
if (rval->sys_rval1 & O_APPEND)
return (__systemcall(rval, SYS_write + 1024, fd, bufferp,
num_bytes));
return (__systemcall(rval, SYS_pwrite64 + 1024, fd, bufferp,
num_bytes, offset_1, offset_2));
}
#endif /* !_LP64 */
/*
* These are convenience macros that s10_getdents_common() uses. Both treat
* their arguments, which should be character pointers, as dirent pointers or
* dirent64 pointers and yield their d_name and d_reclen fields. These
* macros shouldn't be used outside of s10_getdents_common().
*/
#define dirent_name(charptr) ((charptr) + name_offset)
#define dirent_reclen(charptr) \
(*(unsigned short *)(uintptr_t)((charptr) + reclen_offset))
/*
* This function contains code that is common to both s10_getdents() and
* s10_getdents64(). See the comment above s10_getdents() for details.
*
* rval, fd, buf, and nbyte should be passed unmodified from s10_getdents()
* and s10_getdents64(). getdents_syscall_id should be either SYS_getdents
* or SYS_getdents64. name_offset should be the the byte offset of
* the d_name field in the dirent structures passed to the kernel via the
* syscall represented by getdents_syscall_id. reclen_offset should be
* the byte offset of the d_reclen field in the aforementioned dirent
* structures.
*/
static int
s10_getdents_common(sysret_t *rval, int fd, char *buf, size_t nbyte,
int getdents_syscall_id, size_t name_offset, size_t reclen_offset)
{
int err;
size_t buf_size;
char *local_buf;
char *buf_current;
/*
* Use a special brand operation, B_S10_ISFDXATTRDIR, to determine
* whether the specified file descriptor refers to an extended file
* attribute directory. If it doesn't, then SYS_getdents won't
* reveal extended file attributes, in which case we can simply
* hand the syscall to the native kernel.
*/
if ((err = __systemcall(rval, SYS_brand + 1024, B_S10_ISFDXATTRDIR,
fd)) != 0)
return (err);
if (rval->sys_rval1 == 0)
return (__systemcall(rval, getdents_syscall_id + 1024, fd, buf,
nbyte));
/*
* The file descriptor refers to an extended file attributes directory.
* We need to create a dirent buffer that's as large as buf into which
* the native SYS_getdents will store the special extended file
* attribute directory's entries. We can't dereference buf because
* it might be an invalid pointer!
*/
if (nbyte > MAXGETDENTS_SIZE)
nbyte = MAXGETDENTS_SIZE;
local_buf = (char *)malloc(nbyte);
if (local_buf == NULL) {
/*
* getdents(2) doesn't return an error code indicating a memory
* allocation error and it doesn't make sense to return any of
* its documented error codes for a malloc(3C) failure. We'll
* use ENOMEM even though getdents(2) doesn't use it because it
* best describes the failure.
*/
(void) S10_TRUSS_POINT_3(rval, getdents_syscall_id, ENOMEM, fd,
buf, nbyte);
rval->sys_rval1 = -1;
rval->sys_rval2 = 0;
return (EIO);
}
/*
* Issue a native SYS_getdents syscall but use our local dirent buffer
* instead of buf. This will allow us to examine the returned dirent
* structures immediately and copy them to buf later. That way the
* calling process won't be able to see the dirent structures until
* we finish examining them.
*/
if ((err = __systemcall(rval, getdents_syscall_id + 1024, fd, local_buf,
nbyte)) != 0) {
free(local_buf);
return (err);
}
buf_size = rval->sys_rval1;
if (buf_size == 0) {
free(local_buf);
return (0);
}
/*
* Look for SUNWattr_ro (VIEW_READONLY) and SUNWattr_rw
* (VIEW_READWRITE) in the directory entries and remove them
* from the dirent buffer.
*/
for (buf_current = local_buf;
(size_t)(buf_current - local_buf) < buf_size; /* cstyle */) {
if (strcmp(dirent_name(buf_current), VIEW_READONLY) != 0 &&
strcmp(dirent_name(buf_current), VIEW_READWRITE) != 0) {
/*
* The dirent refers to an attribute that should
* be visible to Solaris 10 processes. Keep it
* and examine the next entry in the buffer.
*/
buf_current += dirent_reclen(buf_current);
} else {
/*
* We found either SUNWattr_ro (VIEW_READONLY)
* or SUNWattr_rw (VIEW_READWRITE). Remove it
* from the dirent buffer by decrementing
* buf_size by the size of the entry and
* overwriting the entry with the remaining
* entries.
*/
buf_size -= dirent_reclen(buf_current);
(void) memmove(buf_current, buf_current +
dirent_reclen(buf_current), buf_size -
(size_t)(buf_current - local_buf));
}
}
/*
* Copy local_buf into buf so that the calling process can see
* the results.
*/
if ((err = s10_uucopy(local_buf, buf, buf_size)) != 0) {
free(local_buf);
rval->sys_rval1 = -1;
rval->sys_rval2 = 0;
return (err);
}
rval->sys_rval1 = buf_size;
free(local_buf);
return (0);
}
/*
* Solaris Next added two special extended file attributes, SUNWattr_ro and
* SUNWattr_rw, which are called "extended system attributes". They have
* special semantics (e.g., a process cannot unlink SUNWattr_ro) and should
* not appear in solaris10-branded zones because no Solaris 10 applications,
* including system commands such as tar(1), are coded to correctly handle these
* special attributes.
*
* This emulation function solves the aforementioned problem by emulating
* the getdents(2) syscall and filtering both system attributes out of resulting
* directory entry lists. The emulation function only filters results when
* the given file descriptor refers to an extended file attribute directory.
* Filtering getdents(2) results is expensive because it requires dynamic
* memory allocation; however, the performance cost is tolerable because
* we don't expect Solaris 10 processes to frequently examine extended file
* attribute directories.
*
* The brand's emulation library needs two getdents(2) emulation functions
* because getdents(2) comes in two flavors: non-largefile-aware getdents(2)
* and largefile-aware getdents64(2). s10_getdents() handles the non-largefile-
* aware case for 32-bit processes and all getdents(2) syscalls for 64-bit
* processes (64-bit processes use largefile-aware interfaces by default).
* See s10_getdents64() below for the largefile-aware getdents64(2) emulation
* function for 32-bit processes.
*/
static int
s10_getdents(sysret_t *rval, int fd, struct dirent *buf, size_t nbyte)
{
return (s10_getdents_common(rval, fd, (char *)buf, nbyte, SYS_getdents,
offsetof(struct dirent, d_name),
offsetof(struct dirent, d_reclen)));
}
#ifndef _LP64
/*
* This is the largefile-aware version of getdents(2) for 32-bit processes.
* This exists for the same reason that s10_getdents() exists. See the comment
* above s10_getdents().
*/
static int
s10_getdents64(sysret_t *rval, int fd, struct dirent64 *buf, size_t nbyte)
{
return (s10_getdents_common(rval, fd, (char *)buf, nbyte,
SYS_getdents64, offsetof(struct dirent64, d_name),
offsetof(struct dirent64, d_reclen)));
}
#endif /* !_LP64 */
#define S10_AC_PROC (0x1 << 28)
#define S10_AC_TASK (0x2 << 28)
#define S10_AC_FLOW (0x4 << 28)
#define S10_AC_MODE(x) ((x) & 0xf0000000)
#define S10_AC_OPTION(x) ((x) & 0x0fffffff)
/*
* The mode shift, mode mask and option mask for acctctl have changed. The
* mode is currently the top full byte and the option is the lower 3 full bytes.
*/
int
s10_acctctl(sysret_t *rval, int cmd, void *buf, size_t bufsz)
{
int mode = S10_AC_MODE(cmd);
int option = S10_AC_OPTION(cmd);
switch (mode) {
case S10_AC_PROC:
mode = AC_PROC;
break;
case S10_AC_TASK:
mode = AC_TASK;
break;
case S10_AC_FLOW:
mode = AC_FLOW;
break;
default:
return (S10_TRUSS_POINT_3(rval, SYS_acctctl, EINVAL, cmd, buf,
bufsz));
}
return (__systemcall(rval, SYS_acctctl + 1024, mode | option, buf,
bufsz));
}
/*
* The Audit Policy parameters have changed due to:
* 6466722 audituser and AUDIT_USER are defined, unused, undocumented and
* should be removed.
*
* In S10 we had the following flag:
* #define AUDIT_USER 0x0040
* which doesn't exist in Solaris Next where the subsequent flags are shifted
* down. For example, in S10 we had:
* #define AUDIT_GROUP 0x0080
* but on Solaris Next we have:
* #define AUDIT_GROUP 0x0040
* AUDIT_GROUP has the value AUDIT_USER had in S10 and all of the subsequent
* bits are also shifted one place.
*
* When we're getting or setting the Audit Policy parameters we need to
* shift the outgoing or incoming bits into their proper positions. Since
* S10_AUDIT_USER was always unused, we always clear that bit on A_GETPOLICY.
*
* The command we care about, BSM_AUDITCTL, passes the most parameters (3),
* so declare this function to take up to 4 args and just pass them on.
* The number of parameters for s10_auditsys needs to be equal to the BSM_*
* subcommand that has the most parameters, since we want to pass all
* parameters through, regardless of which subcommands we interpose on.
*
* Note that the auditsys system call uses the SYSENT_AP macro wrapper instead
* of the more common SYSENT_CI macro. This means the return value is a
* SE_64RVAL so the syscall table uses RV_64RVAL.
*/
#define S10_AUDIT_HMASK 0xffffffc0
#define S10_AUDIT_LMASK 0x3f
int
s10_auditsys(sysret_t *rval, int bsmcmd, intptr_t a0, intptr_t a1, intptr_t a2)
{
int err;
uint_t m;
if (bsmcmd != BSM_AUDITCTL)
return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, a1,
a2));
if ((int)a0 == A_GETPOLICY) {
if ((err = __systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0,
&m, a2)) != 0)
return (err);
m = ((m & S10_AUDIT_HMASK) << 1) | (m & S10_AUDIT_LMASK);
if (s10_uucopy(&m, (void *)a1, sizeof (m)) != 0)
return (EFAULT);
return (0);
} else if ((int)a0 == A_SETPOLICY) {
if (s10_uucopy((const void *)a1, &m, sizeof (m)) != 0)
return (EFAULT);
m = ((m >> 1) & S10_AUDIT_HMASK) | (m & S10_AUDIT_LMASK);
return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, &m,
a2));
}
return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, a1, a2));
}
/*
* Determine whether the executable passed to SYS_exec or SYS_execve is a
* native executable. The s10_npreload.so invokes the B_S10_NATIVE brand
* operation which patches up the processes exec info to eliminate any trace
* of the wrapper. That will make pgrep and other commands that examine
* process' executable names and command-line parameters work properly.
*/
static int
s10_exec_native(sysret_t *rval, const char *fname, const char **argp,
const char **envp)
{
const char *filename = fname;
char path[64];
int err;
/* Get a copy of the executable we're trying to run */
path[0] = '\0';
(void) s10_uucopystr(filename, path, sizeof (path));
/* Check if we're trying to run a native binary */
if (strncmp(path, "/.SUNWnative/usr/lib/brand/solaris10/s10_native",
sizeof (path)) != 0)
return (0);
/* Skip the first element in the argv array */
argp++;
/*
* The the path of the dynamic linker is the second parameter
* of s10_native_exec().
*/
if (s10_uucopy(argp, &filename, sizeof (char *)) != 0)
return (EFAULT);
/* If an exec call succeeds, it never returns */
err = __systemcall(rval, SYS_brand + 1024, B_EXEC_NATIVE, filename,
argp, envp, NULL, NULL, NULL);
s10_assert(err != 0);
return (err);
}
/*
* Interpose on the SYS_exec syscall to detect native wrappers.
*/
int
s10_exec(sysret_t *rval, const char *fname, const char **argp)
{
int err;
if ((err = s10_exec_native(rval, fname, argp, NULL)) != 0)
return (err);
/* If an exec call succeeds, it never returns */
err = __systemcall(rval, SYS_execve + 1024, fname, argp, NULL);
s10_assert(err != 0);
return (err);
}
/*
* Interpose on the SYS_execve syscall to detect native wrappers.
*/
int
s10_execve(sysret_t *rval, const char *fname, const char **argp,
const char **envp)
{
int err;
if ((err = s10_exec_native(rval, fname, argp, envp)) != 0)
return (err);
/* If an exec call succeeds, it never returns */
err = __systemcall(rval, SYS_execve + 1024, fname, argp, envp);
s10_assert(err != 0);
return (err);
}
/*
* S10's issetugid() syscall is now a subcode to privsys().
*/
static int
s10_issetugid(sysret_t *rval)
{
return (__systemcall(rval, SYS_privsys + 1024, PRIVSYS_ISSETUGID,
0, 0, 0, 0, 0));
}
/*
* New last arg "block" flag should be zero. The block flag is used by
* the Opensolaris AIO implementation, which is now part of libc.
*/
static int
s10_sigqueue(sysret_t *rval, pid_t pid, int signo, void *value, int si_code)
{
return (__systemcall(rval, SYS_sigqueue + 1024, pid, signo, value,
si_code, 0));
}
static long
s10_uname(sysret_t *rv, uintptr_t p1)
{
struct utsname un, *unp = (struct utsname *)p1;
int rev, err;
if ((err = __systemcall(rv, SYS_uname + 1024, &un)) != 0)
return (err);
rev = atoi(&un.release[2]);
s10_assert(rev >= 11);
bzero(un.release, _SYS_NMLN);
(void) strlcpy(un.release, S10_UTS_RELEASE, _SYS_NMLN);
bzero(un.version, _SYS_NMLN);
(void) strlcpy(un.version, S10_UTS_VERSION, _SYS_NMLN);
/* copy out the modified uname info */
return (s10_uucopy(&un, unp, sizeof (un)));
}
int
s10_sysinfo(sysret_t *rv, int command, char *buf, long count)
{
char *value;
int len;
/*
* We must interpose on the sysinfo(2) commands SI_RELEASE and
* SI_VERSION; all others get passed to the native sysinfo(2)
* command.
*/
switch (command) {
case SI_RELEASE:
value = S10_UTS_RELEASE;
break;
case SI_VERSION:
value = S10_UTS_VERSION;
break;
default:
/*
* The default action is to pass the command to the
* native sysinfo(2) syscall.
*/
return (__systemcall(rv, SYS_systeminfo + 1024,
command, buf, count));
}
len = strlen(value) + 1;
if (count > 0) {
if (s10_uucopystr(value, buf, count) != 0)
return (EFAULT);
/* Assure NULL termination of buf as s10_uucopystr() doesn't. */
if (len > count && s10_uucopy("\0", buf + (count - 1), 1) != 0)
return (EFAULT);
}
/*
* On success, sysinfo(2) returns the size of buffer required to hold
* the complete value plus its terminating NULL byte.
*/
(void) S10_TRUSS_POINT_3(rv, SYS_systeminfo, 0, command, buf, count);
rv->sys_rval1 = len;
rv->sys_rval2 = 0;
return (0);
}
#if defined(__x86)
#if defined(__amd64)
/*
* 64-bit x86 LWPs created by SYS_lwp_create start here if they need to set
* their %fs registers to the legacy Solaris 10 selector value.
*
* This function does three things:
*
* 1. Trap to the kernel so that it can set %fs to the legacy Solaris 10
* selector value.
* 2. Read the LWP's true entry point (the entry point supplied by libc
* when SYS_lwp_create was invoked) from %r14.
* 3. Eliminate this function's stack frame and pass control to the LWP's
* true entry point.
*
* See the comment above s10_lwp_create_correct_fs() (see below) for the reason
* why this function exists.
*/
/*ARGSUSED*/
static void
s10_lwp_create_entry_point(void *ulwp_structp)
{
sysret_t rval;
/*
* The new LWP's %fs register is initially zero, but libc won't
* function correctly when %fs is zero. Change the LWP's %fs register
* via SYS_brand.
*/
(void) __systemcall(&rval, SYS_brand + 1024, B_S10_FSREGCORRECTION);
/*
* Jump to the true entry point, which is stored in %r14.
* Remove our stack frame before jumping so that
* s10_lwp_create_entry_point() won't be seen in stack traces.
*
* NOTE: s10_lwp_create_entry_point() pushes %r12 onto its stack frame
* so that it can use it as a temporary register. We don't restore %r12
* in this assembly block because we don't care about its value (and
* neither does _lwp_start()). Besides, the System V ABI AMD64
* Actirecture Processor Supplement doesn't specify that %r12 should
* have a special value when LWPs start, so we can ignore its value when
* we jump to the true entry point. Furthermore, %r12 is a callee-saved
* register, so the true entry point should push %r12 onto its stack
* before using the register. We ignore %r14 after we read it for
* similar reasons.
*
* NOTE: The compiler will generate a function epilogue for this
* function despite the fact that the LWP will never execute it.
* We could hand-code this entire function in assembly to eliminate
* the epilogue, but the epilogue is only three or four instructions,
* so we wouldn't save much space. Besides, why would we want
* to create yet another ugly, hard-to-maintain assembly function when
* we could write most of it in C?
*/
__asm__ __volatile__(
"movq %0, %%rdi\n\t" /* pass ulwp_structp as arg1 */
"movq %%rbp, %%rsp\n\t" /* eliminate the stack frame */
"popq %%rbp\n\t"
"jmp *%%r14\n\t" /* jump to the true entry point */
: : "r" (ulwp_structp));
/*NOTREACHED*/
}
/*
* The S10 libc expects that %fs will be nonzero for new 64-bit x86 LWPs but the
* Nevada kernel clears %fs for such LWPs. Unforunately, new LWPs do not issue
* SYS_lwp_private (see s10_lwp_private() below) after they are created, so
* we must ensure that new LWPs invoke a brand operation that sets %fs to a
* nonzero value immediately after their creation.
*
* The easiest way to do this is to make new LWPs start at a special function,
* s10_lwp_create_entry_point() (see its definition above), that invokes the
* brand operation that corrects %fs. We'll store the entry points of new LWPs
* in their %r14 registers so that s10_lwp_create_entry_point() can find and
* call them after invoking the special brand operation. %r14 is a callee-saved
* register; therefore, any functions invoked by s10_lwp_create_entry_point()
* and all functions dealing with signals (e.g., sigacthandler()) will preserve
* %r14 for s10_lwp_create_entry_point().
*
* The Nevada kernel can safely work with nonzero %fs values because the kernel
* configures per-thread %fs segment descriptors so that the legacy %fs selector
* value will still work. See the comment in lwp_load() regarding %fs and
* %fsbase in 64-bit x86 processes.
*
* This emulation exists thanks to CRs 6467491 and 6501650.
*/
static int
s10_lwp_create_correct_fs(sysret_t *rval, ucontext_t *ucp, int flags,
id_t *new_lwp)
{
ucontext_t s10_uc;
/*
* Copy the supplied ucontext_t structure to the local stack
* frame and store the new LWP's entry point (the value of %rip
* stored in the ucontext_t) in the new LWP's %r14 register.
* Then make s10_lwp_create_entry_point() the new LWP's entry
* point.
*/
if (s10_uucopy(ucp, &s10_uc, sizeof (s10_uc)) != 0)
return (EFAULT);
s10_uc.uc_mcontext.gregs[REG_R14] = s10_uc.uc_mcontext.gregs[REG_RIP];
s10_uc.uc_mcontext.gregs[REG_RIP] = (greg_t)s10_lwp_create_entry_point;
/*
* Issue SYS_lwp_create to create the new LWP. We pass the
* modified ucontext_t to make sure that the new LWP starts at
* s10_lwp_create_entry_point().
*/
return (__systemcall(rval, SYS_lwp_create + 1024, &s10_uc,
flags, new_lwp));
}
#endif /* __amd64 */
/*
* This function is invoked on x86 systems when SYS_lwp_create is issued but no
* %fs register correction is necessary.
*
* See the comment above s10_lwp_create_correct_fs() above for more details.
*/
static int
s10_lwp_create(sysret_t *rval, ucontext_t *ucp, int flags, id_t *new_lwp)
{
return (__systemcall(rval, SYS_lwp_create + 1024, ucp, flags, new_lwp));
}
/*
* SYS_lwp_private is issued by libc_init() to set %fsbase in 64-bit x86
* processes. The Nevada kernel sets %fs to zero but the S10 libc expects
* %fs to be nonzero. We'll pass the issued system call to the kernel untouched
* and invoke a brand operation to set %fs to the legacy S10 selector value.
*
* This emulation exists thanks to CRs 6467491 and 6501650.
*/
static int
s10_lwp_private(sysret_t *rval, int cmd, int which, uintptr_t base)
{
#if defined(__amd64)
int err;
/*
* The current LWP's %fs register should be zero. Determine whether the
* Solaris 10 libc with which we're working functions correctly when %fs
* is zero by calling thr_main() after issuing the SYS_lwp_private
* syscall. If thr_main() barfs (returns -1), then change the LWP's %fs
* register via SYS_brand and patch s10_sysent_table so that issuing
* SYS_lwp_create executes s10_lwp_create_correct_fs() rather than the
* default s10_lwp_create(). s10_lwp_create_correct_fs() will
* guarantee that new LWPs will have correct %fs values.
*/
if ((err = __systemcall(rval, SYS_lwp_private + 1024, cmd, which,
base)) != 0)
return (err);
if (thr_main() == -1) {
/*
* SYS_lwp_private is only issued by libc_init(), which is
* executed when libc is first loaded by ld.so.1. Thus we
* are guaranteed to be single-threaded at this point. Even
* if we were multithreaded at this point, writing a 64-bit
* value to the st_callc field of a s10_sysent_table
* entry is guaranteed to be atomic on 64-bit x86 chips
* as long as the field is not split across cache lines
* (It shouldn't be.). See chapter 8, section 1.1 of
* "The Intel 64 and IA32 Architectures Software Developer's
* Manual," Volume 3A for more details.
*/
s10_sysent_table[SYS_lwp_create].st_callc =
(sysent_cb_t)s10_lwp_create_correct_fs;
return (__systemcall(rval, SYS_brand + 1024,
B_S10_FSREGCORRECTION));
}
return (0);
#else /* !__amd64 */
return (__systemcall(rval, SYS_lwp_private + 1024, cmd, which, base));
#endif /* !__amd64 */
}
#endif /* __x86 */
/*
* The Opensolaris versions of lwp_mutex_timedlock() and lwp_mutex_trylock()
* add an extra argument to the interfaces, a uintptr_t value for the mutex's
* mutex_owner field. The Solaris 10 libc assigns the mutex_owner field at
* user-level, so we just make the extra argument be zero in both syscalls.
*/
static int
s10_lwp_mutex_timedlock(sysret_t *rval, lwp_mutex_t *lp, timespec_t *tsp)
{
return (__systemcall(rval, SYS_lwp_mutex_timedlock + 1024, lp, tsp, 0));
}
static int
s10_lwp_mutex_trylock(sysret_t *rval, lwp_mutex_t *lp)
{
return (__systemcall(rval, SYS_lwp_mutex_trylock + 1024, lp, 0));
}
/*
* If the emul_global_zone flag is set then emulate some aspects of the
* zone system call. In particular, emulate the global zone ID on the
* ZONE_LOOKUP subcommand and emulate some of the global zone attributes
* on the ZONE_GETATTR subcommand. If the flag is not set or we're performing
* some other operation, simply pass the calls through.
*/
int
s10_zone(sysret_t *rval, int cmd, void *arg1, void *arg2, void *arg3,
void *arg4)
{
char *aval;
int len;
zoneid_t zid;
int attr;
char *buf;
size_t bufsize;
/*
* We only emulate the zone syscall for a subset of specific commands,
* otherwise we just pass the call through.
*/
if (!emul_global_zone)
return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2,
arg3, arg4));
switch (cmd) {
case ZONE_LOOKUP:
(void) S10_TRUSS_POINT_1(rval, SYS_zone, 0, cmd);
rval->sys_rval1 = GLOBAL_ZONEID;
rval->sys_rval2 = 0;
return (0);
case ZONE_GETATTR:
zid = (zoneid_t)(uintptr_t)arg1;
attr = (int)(uintptr_t)arg2;
buf = (char *)arg3;
bufsize = (size_t)arg4;
/*
* If the request is for the global zone then we're emulating
* that, otherwise pass this thru.
*/
if (zid != GLOBAL_ZONEID)
goto passthru;
switch (attr) {
case ZONE_ATTR_NAME:
aval = GLOBAL_ZONENAME;
break;
case ZONE_ATTR_BRAND:
aval = NATIVE_BRAND_NAME;
break;
default:
/*
* We only emulate a subset of the attrs, use the
* real zone id to pass thru the rest.
*/
arg1 = (void *)(uintptr_t)zoneid;
goto passthru;
}
(void) S10_TRUSS_POINT_5(rval, SYS_zone, 0, cmd, zid, attr,
buf, bufsize);
len = strlen(aval) + 1;
if (len > bufsize)
return (ENAMETOOLONG);
if (buf != NULL) {
if (len == 1) {
if (s10_uucopy("\0", buf, 1) != 0)
return (EFAULT);
} else {
if (s10_uucopystr(aval, buf, len) != 0)
return (EFAULT);
/*
* Assure NULL termination of "buf" as
* s10_uucopystr() does NOT.
*/
if (s10_uucopy("\0", buf + (len - 1), 1) != 0)
return (EFAULT);
}
}
rval->sys_rval1 = len;
rval->sys_rval2 = 0;
return (0);
default:
break;
}
passthru:
return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2, arg3,
arg4));
}
/*
* Close a libc file handle, but don't actually close the underlying
* file descriptor.
*/
static void
s10_close_fh(FILE *file)
{
int fd, fd_new;
if (file == NULL)
return;
if ((fd = fileno(file)) < 0)
return;
fd_new = dup(fd);
if (fd_new == -1)
return;
(void) fclose(file);
(void) dup2(fd_new, fd);
(void) close(fd_new);
}
/*ARGSUSED*/
int
s10_init(int argc, char *argv[], char *envp[])
{
sysret_t rval;
s10_brand_reg_t reg;
s10_elf_data_t sed;
auxv_t *ap;
uintptr_t *p;
int i, err;
char *bname;
/* Sanity check our translation table return value codes */
for (i = 0; i < NSYSCALL; i++) {
s10_sysent_table_t *est = &(s10_sysent_table[i]);
s10_assert(BIT_ONLYONESET(est->st_args & RV_MASK));
}
/*
* We need to shutdown all libc stdio. libc stdio normally goes to
* file descriptors, but since we're actually part of a another
* process we don't own these file descriptors and we can't make
* any assumptions about their state.
*/
s10_close_fh(stdin);
s10_close_fh(stdout);
s10_close_fh(stderr);
/*
* Cache the pid of the zone's init process and determine if
* we're init(1m) for the zone. Remember: we might be init
* now, but as soon as we fork(2) we won't be.
*/
(void) get_initpid_info();
/* get the current zoneid */
err = __systemcall(&rval, SYS_zone, ZONE_LOOKUP, NULL);
s10_assert(err == 0);
zoneid = (zoneid_t)rval.sys_rval1;
/* Get the zone's emulation bitmap. */
if ((err = __systemcall(&rval, SYS_zone, ZONE_GETATTR, zoneid,
S10_EMUL_BITMAP, emul_bitmap, sizeof (emul_bitmap))) != 0) {
s10_abort(err, "The zone's patch level is unsupported");
/*NOTREACHED*/
}
bname = basename(argv[0]);
/*
* In general we want the S10 commands that are zone-aware to continue
* to behave as they normally do within a zone. Since these commands
* are zone-aware, they should continue to "do the right thing".
* However, some zone-aware commands aren't going to work the way
* we expect them to inside the branded zone. In particular, the pkg
* and patch commands will not properly manage all pkgs/patches
* unless the commands think they are running in the global zone. For
* these commands we want to emulate the global zone.
*
* We don't do any emulation for pkgcond since it is typically used
* in pkg/patch postinstall scripts and we want those scripts to do
* the right thing inside a zone.
*
* One issue is the handling of hollow pkgs. Since the pkgs are
* hollow, they won't use pkgcond in their postinstall scripts. These
* pkgs typically are installing drivers so we handle that by
* replacing add_drv and rem_drv in the s10_boot script.
*/
if (strcmp("pkgadd", bname) == 0 || strcmp("pkgrm", bname) == 0 ||
strcmp("patchadd", bname) == 0 || strcmp("patchrm", bname) == 0)
emul_global_zone = B_TRUE;
/*
* Register our syscall emulation table with the kernel.
* Note that we don't have to do invoke (syscall_number + 1024)
* until we've actually establised a syscall emulation callback
* handler address, which is what we're doing with this brand
* syscall.
*/
reg.sbr_version = S10_VERSION;
#ifdef __x86
reg.sbr_handler = (caddr_t)s10_handler_table;
#else /* !__x86 */
reg.sbr_handler = (caddr_t)s10_handler;
#endif /* !__x86 */
if ((err = __systemcall(&rval, SYS_brand, B_REGISTER, &reg)) != 0) {
s10_abort(err, "Failed to brand current process");
/*NOTREACHED*/
}
/* Get data about the executable we're running from the kernel. */
if ((err = __systemcall(&rval, SYS_brand + 1024,
B_ELFDATA, (void *)&sed)) != 0) {
s10_abort(err,
"Failed to get required brand ELF data from the kernel");
/*NOTREACHED*/
}
/*
* Find the aux vector on the stack.
*/
p = (uintptr_t *)envp;
while (*p != NULL)
p++;
/*
* p is now pointing at the 0 word after the environ pointers.
* After that is the aux vectors.
*
* The aux vectors are currently pointing to the brand emulation
* library and associated linker. We're going to change them to
* point to the brand executable and associated linker (or to no
* linker for static binaries). This matches the process data
* stored within the kernel and visible from /proc, which was
* all setup in s10_elfexec(). We do this so that when a debugger
* attaches to the process it sees the process as a normal solaris
* process, this brand emulation library and everything on it's
* link map will not be visible, unless our librtld_db plugin
* is used. Note that this is very different from how Linux
* branded processes are implemented within lx branded zones.
* In that situation, the primary linkmap of the process is the
* brand emulation libraries linkmap, not the Linux applications
* linkmap.
*
* We also need to clear the AF_SUN_NOPLM flag from the AT_SUN_AUXFLAGS
* aux vector. This flag told our linker that we don't have a
* primary link map. Now that our linker is done initializing, we
* want to clear this flag before we transfer control to the
* applications copy of the linker, since we want that linker to have
* a primary link map which will be the link map for the application
* we're running.
*/
p++;
for (ap = (auxv_t *)p; ap->a_type != AT_NULL; ap++) {
switch (ap->a_type) {
case AT_BASE:
/* Hide AT_BASE if static binary */
if (sed.sed_base == NULL) {
ap->a_type = AT_IGNORE;
ap->a_un.a_val = NULL;
} else {
ap->a_un.a_val = sed.sed_base;
}
break;
case AT_ENTRY:
ap->a_un.a_val = sed.sed_entry;
break;
case AT_PHDR:
ap->a_un.a_val = sed.sed_phdr;
break;
case AT_PHENT:
ap->a_un.a_val = sed.sed_phent;
break;
case AT_PHNUM:
ap->a_un.a_val = sed.sed_phnum;
break;
case AT_SUN_AUXFLAGS:
ap->a_un.a_val &= ~AF_SUN_NOPLM;
break;
case AT_SUN_EMULATOR:
/*
* ld.so.1 inspects AT_SUN_EMULATOR to see if
* if it is the linker for the brand emulation
* library. Hide AT_SUN_EMULATOR, as the
* linker we are about to jump to is the linker
* for the binary.
*/
ap->a_type = AT_IGNORE;
ap->a_un.a_val = NULL;
break;
case AT_SUN_LDDATA:
/* Hide AT_SUN_LDDATA if static binary */
if (sed.sed_lddata == NULL) {
ap->a_type = AT_IGNORE;
ap->a_un.a_val = NULL;
} else {
ap->a_un.a_val = sed.sed_lddata;
}
break;
default:
break;
}
}
s10_runexe(argv, sed.sed_ldentry);
/*NOTREACHED*/
s10_abort(0, "s10_runexe() returned");
return (-1);
}
/*
* This table must have at least NSYSCALL entries in it.
*
* The second parameter of each entry in the s10_sysent_table
* contains the number of parameters and flags that describe the
* syscall return value encoding. See the block comments at the
* top of this file for more information about the syscall return
* value flags and when they should be used.
*/
s10_sysent_table_t s10_sysent_table[] = {
#if defined(__sparc) && !defined(__sparcv9)
EMULATE(s10_indir, 9 | RV_64RVAL), /* 0 */
#else
NOSYS, /* 0 */
#endif
NOSYS, /* 1 */
EMULATE(s10_forkall, 0 | RV_32RVAL2), /* 2 */
NOSYS, /* 3 */
NOSYS, /* 4 */
EMULATE(s10_open, 3 | RV_DEFAULT), /* 5 */
NOSYS, /* 6 */
EMULATE(s10_wait, 0 | RV_32RVAL2), /* 7 */
EMULATE(s10_creat, 2 | RV_DEFAULT), /* 8 */
NOSYS, /* 9 */
EMULATE(s10_unlink, 1 | RV_DEFAULT), /* 10 */
EMULATE(s10_exec, 2 | RV_DEFAULT), /* 11 */
NOSYS, /* 12 */
NOSYS, /* 13 */
NOSYS, /* 14 */
NOSYS, /* 15 */
EMULATE(s10_chown, 3 | RV_DEFAULT), /* 16 */
NOSYS, /* 17 */
EMULATE(s10_stat, 2 | RV_DEFAULT), /* 18 */
NOSYS, /* 19 */
NOSYS, /* 20 */
NOSYS, /* 21 */
EMULATE(s10_umount, 1 | RV_DEFAULT), /* 22 */
NOSYS, /* 23 */
NOSYS, /* 24 */
NOSYS, /* 25 */
NOSYS, /* 26 */
NOSYS, /* 27 */
EMULATE(s10_fstat, 2 | RV_DEFAULT), /* 28 */
NOSYS, /* 29 */
EMULATE(s10_utime, 2 | RV_DEFAULT), /* 30 */
NOSYS, /* 31 */
NOSYS, /* 32 */
EMULATE(s10_access, 2 | RV_DEFAULT), /* 33 */
NOSYS, /* 34 */
NOSYS, /* 35 */
NOSYS, /* 36 */
NOSYS, /* 37 */
NOSYS, /* 38 */
NOSYS, /* 39 */
NOSYS, /* 40 */
EMULATE(s10_dup, 1 | RV_DEFAULT), /* 41 */
NOSYS, /* 42 */
NOSYS, /* 43 */
NOSYS, /* 44 */
NOSYS, /* 45 */
NOSYS, /* 46 */
NOSYS, /* 47 */
NOSYS, /* 48 */
NOSYS, /* 49 */
NOSYS, /* 50 */
NOSYS, /* 51 */
NOSYS, /* 52 */
NOSYS, /* 53 */
EMULATE(s10_ioctl, 3 | RV_DEFAULT), /* 54 */
NOSYS, /* 55 */
NOSYS, /* 56 */
NOSYS, /* 57 */
NOSYS, /* 58 */
EMULATE(s10_execve, 3 | RV_DEFAULT), /* 59 */
NOSYS, /* 60 */
NOSYS, /* 61 */
NOSYS, /* 62 */
NOSYS, /* 63 */
NOSYS, /* 64 */
NOSYS, /* 65 */
NOSYS, /* 66 */
NOSYS, /* 67 */
NOSYS, /* 68 */
NOSYS, /* 69 */
NOSYS, /* 70 */
EMULATE(s10_acctctl, 3 | RV_DEFAULT), /* 71 */
NOSYS, /* 72 */
NOSYS, /* 73 */
NOSYS, /* 74 */
EMULATE(s10_issetugid, 0 | RV_DEFAULT), /* 75 */
EMULATE(s10_fsat, 6 | RV_DEFAULT), /* 76 */
NOSYS, /* 77 */
NOSYS, /* 78 */
EMULATE(s10_rmdir, 1 | RV_DEFAULT), /* 79 */
NOSYS, /* 80 */
EMULATE(s10_getdents, 3 | RV_DEFAULT), /* 81 */
NOSYS, /* 82 */
NOSYS, /* 83 */
NOSYS, /* 84 */
NOSYS, /* 85 */
NOSYS, /* 86 */
EMULATE(s10_poll, 3 | RV_DEFAULT), /* 87 */
EMULATE(s10_lstat, 2 | RV_DEFAULT), /* 88 */
NOSYS, /* 89 */
NOSYS, /* 90 */
NOSYS, /* 91 */
NOSYS, /* 92 */
NOSYS, /* 93 */
EMULATE(s10_fchown, 3 | RV_DEFAULT), /* 94 */
NOSYS, /* 95 */
NOSYS, /* 96 */
NOSYS, /* 97 */
NOSYS, /* 98 */
NOSYS, /* 99 */
NOSYS, /* 100 */
NOSYS, /* 101 */
NOSYS, /* 102 */
NOSYS, /* 103 */
NOSYS, /* 104 */
NOSYS, /* 105 */
NOSYS, /* 106 */
NOSYS, /* 107 */
NOSYS, /* 108 */
NOSYS, /* 109 */
NOSYS, /* 110 */
NOSYS, /* 111 */
NOSYS, /* 112 */
NOSYS, /* 113 */
NOSYS, /* 114 */
NOSYS, /* 115 */
NOSYS, /* 116 */
NOSYS, /* 117 */
NOSYS, /* 118 */
NOSYS, /* 119 */
NOSYS, /* 120 */
NOSYS, /* 121 */
NOSYS, /* 122 */
#if defined(__x86)
EMULATE(s10_xstat, 3 | RV_DEFAULT), /* 123 */
EMULATE(s10_lxstat, 3 | RV_DEFAULT), /* 124 */
EMULATE(s10_fxstat, 3 | RV_DEFAULT), /* 125 */
EMULATE(s10_xmknod, 4 | RV_DEFAULT), /* 126 */
#else
NOSYS, /* 123 */
NOSYS, /* 124 */
NOSYS, /* 125 */
NOSYS, /* 126 */
#endif
NOSYS, /* 127 */
NOSYS, /* 128 */
NOSYS, /* 129 */
EMULATE(s10_lchown, 3 | RV_DEFAULT), /* 130 */
NOSYS, /* 131 */
NOSYS, /* 132 */
NOSYS, /* 133 */
EMULATE(s10_rename, 2 | RV_DEFAULT), /* 134 */
EMULATE(s10_uname, 1 | RV_DEFAULT), /* 135 */
NOSYS, /* 136 */
NOSYS, /* 137 */
NOSYS, /* 138 */
EMULATE(s10_sysinfo, 3 | RV_DEFAULT), /* 139 */
NOSYS, /* 140 */
NOSYS, /* 141 */
NOSYS, /* 142 */
EMULATE(s10_fork1, 0 | RV_32RVAL2), /* 143 */
NOSYS, /* 144 */
NOSYS, /* 145 */
NOSYS, /* 146 */
EMULATE(s10_lwp_sema_wait, 1 | RV_DEFAULT), /* 147 */
NOSYS, /* 148 */
NOSYS, /* 149 */
NOSYS, /* 150 */
NOSYS, /* 151 */
NOSYS, /* 152 */
NOSYS, /* 153 */
EMULATE(s10_utimes, 2 | RV_DEFAULT), /* 154 */
NOSYS, /* 155 */
NOSYS, /* 156 */
NOSYS, /* 157 */
NOSYS, /* 158 */
#ifdef __x86
EMULATE(s10_lwp_create, 3 | RV_DEFAULT), /* 159 */
#else /* !__x86 */
NOSYS, /* 159 */
#endif /* !__x86 */
NOSYS, /* 160 */
NOSYS, /* 161 */
NOSYS, /* 162 */
NOSYS, /* 163 */
NOSYS, /* 164 */
NOSYS, /* 165 */
#if defined(__x86)
EMULATE(s10_lwp_private, 3 | RV_DEFAULT), /* 166 */
#else
NOSYS, /* 166 */
#endif
NOSYS, /* 167 */
NOSYS, /* 168 */
EMULATE(s10_lwp_mutex_lock, 1 | RV_DEFAULT), /* 169 */
NOSYS, /* 170 */
NOSYS, /* 171 */
NOSYS, /* 172 */
NOSYS, /* 173 */
EMULATE(s10_pwrite, 4 | RV_DEFAULT), /* 174 */
NOSYS, /* 175 */
NOSYS, /* 176 */
NOSYS, /* 177 */
NOSYS, /* 178 */
NOSYS, /* 179 */
NOSYS, /* 180 */
NOSYS, /* 181 */
NOSYS, /* 182 */
NOSYS, /* 183 */
NOSYS, /* 184 */
NOSYS, /* 185 */
EMULATE(s10_auditsys, 4 | RV_64RVAL), /* 186 */
NOSYS, /* 187 */
NOSYS, /* 188 */
NOSYS, /* 189 */
EMULATE(s10_sigqueue, 4 | RV_DEFAULT), /* 190 */
NOSYS, /* 191 */
NOSYS, /* 192 */
NOSYS, /* 193 */
NOSYS, /* 194 */
NOSYS, /* 195 */
NOSYS, /* 196 */
NOSYS, /* 197 */
NOSYS, /* 198 */
NOSYS, /* 199 */
NOSYS, /* 200 */
NOSYS, /* 201 */
NOSYS, /* 202 */
NOSYS, /* 203 */
NOSYS, /* 204 */
NOSYS, /* 205 */
NOSYS, /* 206 */
NOSYS, /* 207 */
NOSYS, /* 208 */
NOSYS, /* 209 */
EMULATE(s10_lwp_mutex_timedlock, 2 | RV_DEFAULT), /* 210 */
NOSYS, /* 211 */
NOSYS, /* 212 */
#if defined(_LP64)
NOSYS, /* 213 */
#else
EMULATE(s10_getdents64, 3 | RV_DEFAULT), /* 213 */
#endif
NOSYS, /* 214 */
#if defined(_LP64)
NOSYS, /* 215 */
NOSYS, /* 216 */
NOSYS, /* 217 */
#else
EMULATE(s10_stat64, 2 | RV_DEFAULT), /* 215 */
EMULATE(s10_lstat64, 2 | RV_DEFAULT), /* 216 */
EMULATE(s10_fstat64, 2 | RV_DEFAULT), /* 217 */
#endif
NOSYS, /* 218 */
NOSYS, /* 219 */
NOSYS, /* 220 */
NOSYS, /* 221 */
NOSYS, /* 222 */
#if defined(_LP64)
NOSYS, /* 223 */
NOSYS, /* 224 */
NOSYS, /* 225 */
#else
EMULATE(s10_pwrite64, 5 | RV_DEFAULT), /* 223 */
EMULATE(s10_creat64, 2 | RV_DEFAULT), /* 224 */
EMULATE(s10_open64, 3 | RV_DEFAULT), /* 225 */
#endif
NOSYS, /* 226 */
EMULATE(s10_zone, 5 | RV_DEFAULT), /* 227 */
NOSYS, /* 228 */
NOSYS, /* 229 */
NOSYS, /* 230 */
NOSYS, /* 231 */
NOSYS, /* 232 */
NOSYS, /* 233 */
NOSYS, /* 234 */
NOSYS, /* 235 */
NOSYS, /* 236 */
NOSYS, /* 237 */
NOSYS, /* 238 */
NOSYS, /* 239 */
NOSYS, /* 240 */
NOSYS, /* 241 */
NOSYS, /* 242 */
NOSYS, /* 243 */
NOSYS, /* 244 */
NOSYS, /* 245 */
NOSYS, /* 246 */
NOSYS, /* 247 */
NOSYS, /* 248 */
NOSYS, /* 249 */
NOSYS, /* 250 */
EMULATE(s10_lwp_mutex_trylock, 1 | RV_DEFAULT), /* 251 */
NOSYS, /* 252 */
NOSYS, /* 253 */
NOSYS, /* 254 */
NOSYS /* 255 */
};