syscall.c revision 07a48826732249fcd3aa8dd53c8389595e9f1fbc
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/sysmacros.h>
#include <sys/ucontext.h>
#include <sys/machtrap.h>
#include <sys/aio_impl.h>
#include <sys/tnf_probe.h>
#include <sys/privregs.h>
int syscalltrace = 0;
#ifdef SYSCALLTRACE
#endif /* SYSCALLTRACE */
#ifdef _SYSCALL32_IMPL
static struct sysent *
{
return (sysent);
return (sysent32);
}
#else
#endif
/*
* Called to restore the lwp's register window just before
* returning to user level (only if the registers have been
* fetched or modified through /proc).
*/
/*ARGSUSED1*/
void
{
/*
* If locals+ins were modified by /proc copy them out.
* Also copy to the shared window, if necessary.
*/
int is64;
is64 = 1;
} else {
is64 = 0;
}
if (is64)
else {
int watched;
if (watched)
}
/* also copy to the user return window */
}
}
/*
* Get the arguments to the current system call.
* lwp->lwp_ap normally points to the out regs in the reg structure.
* If the user is going to change the out registers and might want to
* get the args (for /proc tracing), it must copy the args elsewhere
* via save_syscall_args().
*/
{
long mask;
long *ap;
int nargs;
else
mask = 0xffffffffffffffff;
while (nargs-- > 0)
} else {
*nargsp = 0;
}
return (code);
}
#ifdef _SYSCALL32_IMPL
/*
* Get the arguments to the current 32-bit system call.
*/
{
long args[MAXSYSARGS];
for (i = 0; i != *nargsp; i++)
return (code);
}
#endif
/*
* Save the system call arguments in a safe place.
* lwp->lwp_ap normally points to the out regs in the reg structure.
* If the user is going to change the out registers, g1, or the stack,
* and might want to get the args (for /proc tracing), it must copy
* the args elsewhere via save_syscall_args().
*
* This may be called from stop() even when we're not in a system call.
* Since there's no easy way to tell, this must be safe (not panic).
* If the copyins get data faults, return non-zero.
*/
int
{
int i;
return (0); /* args already saved or not needed */
nargs = 0; /* illegal syscall */
} else {
/*
* Find out how many arguments the system
* call uses.
*
* We have the property that loaded syscalls
* never change the number of arguments they
* use after they've been loaded once. This
* allows us to stop for /proc tracing without
* holding the module lock.
* /proc is assured that sy_narg is valid.
*/
}
}
/*
* Fetch the system call arguments.
*/
if (nargs == 0)
goto out;
if (nargs > 5) {
for (i = 5; i < nargs; i++) {
uint32_t a;
return (-1);
ua += sizeof (a);
}
}
} else {
if (nargs > 6) {
for (i = 6; i < nargs; i++) {
uint32_t a;
return (-1);
ua += sizeof (a);
}
}
}
} else {
if (nargs > 6) {
for (i = 6; i < nargs; i++) {
unsigned long a;
return (-1);
ua += sizeof (a);
}
}
}
out:
return (0);
}
void
reset_syscall_args(void)
{
lwp->lwp_argsaved = 0;
}
/*
* nonexistent system call-- signal lwp (may want to handle it)
* flag error if lwp won't see signal immediately
* This works for old or new calling sequence.
*/
nosys()
{
}
/*
* Perform pre-system-call processing, including stopping for tracing,
* auditing, microstate-accounting, etc.
*
* This routine is called only if the t_pre_sys flag is set. Any condition
* requiring pre-syscall handling must set the t_pre_sys flag. If the
* condition is persistent, this routine will repost t_pre_sys.
*/
int
pre_syscall(int arg0)
{
unsigned int code;
int repost;
/*
* The syscall arguments in the out registers should be pointed to
* by lwp_ap. If the args need to be copied so that the outs can
* be changed without losing the ability to get the args for /proc,
* they can be saved by save_syscall_args(), and lwp_ap will be
* restored by post_syscall().
*/
/*
* Make sure the thread is holding the latest credentials for the
* process. The credentials in the process right now apply to this
* thread for the entire system call.
*/
/*
* DTrace accesses t_cred in probe context. t_cred must
* always be either NULL, or point to a valid, allocated cred
* structure.
*/
}
/*
* Undo special arrangements to single-step the lwp
* so that a debugger will see valid register contents.
* Also so that the pc is valid for syncfpu().
* Also so that a syscall like exec() can be stepped.
*/
(void) prundostep();
repost = 1;
}
/*
* Check for indirect system call in case we stop for tracing.
* Don't allow multiple indirection.
*/
}
/*
* From the proc(4) manual page:
* When entry to a system call is being traced, the traced process
* stops after having begun the call to the system but before the
* system call arguments have been fetched from the process.
* If proc changes the args we must refetch them after starting.
*/
/*
* Recheck stop condition, now that lock is held.
*/
mutex_enter(&p->p_lock);
/*
* Must refetch args since they were
* possibly modified by /proc. Indicate
* that the valid copy is in the
* registers.
*/
lwp->lwp_argsaved = 0;
}
mutex_exit(&p->p_lock);
}
repost = 1;
}
if (lwp->lwp_sysabort) {
/*
* lwp_sysabort may have been set via /proc while the process
* was stopped on PR_SYSENTRY. If so, abort the system call.
* Override any error from the copyin() of the arguments.
*/
lwp->lwp_sysabort = 0;
return (1); /* don't do system call, return EINTR */
}
if (audit_active) { /* begin auditing for this syscall */
int error;
return (error);
}
repost = 1;
}
#ifndef NPROBE
/* Kernel probe */
if (tnf_tracing_active) {
repost = 1;
}
#endif /* NPROBE */
#ifdef SYSCALLTRACE
if (syscalltrace) {
int i;
long *ap;
char *cp;
char *sysname;
else
(void) save_syscall_args();
else {
}
cp = "(";
cp = ", ";
}
if (i)
printf(")");
}
#endif /* SYSCALLTRACE */
/*
* If there was a continuing reason for pre-syscall processing,
* set the t_pre_sys flag for the next system call.
*/
if (repost)
t->t_pre_sys = 1;
return (0);
}
/*
* Post-syscall processing. Perform abnormal system call completion
* actions such as /proc tracing, profiling, signals, preemption, etc.
*
* This routine is called only if t_post_sys, t_sig_check, or t_astflag is set.
* Any condition requiring pre-syscall handling must set one of these.
* If the condition is persistent, this routine will repost t_post_sys.
*/
void
{
int repost = 0;
int proc_stop = 0; /* non-zero if stopping for /proc */
int sigprof = 0; /* non-zero if sending SIGPROF */
t->t_post_sys = 0;
/*
* Code can be zero if this is a new LWP returning after a forkall(),
* other than the one which matches the one in the parent which called
* forkall(). In these LWPs, skip most of post-syscall activity.
*/
if (code == 0)
goto sig_check;
if (audit_active) { /* put out audit record for this syscall */
/* XX64 -- truncation of 64-bit return values? */
repost = 1;
}
uprintf("%s", m);
}
/*
* If we're going to stop for /proc tracing, set the flag and
* save the arguments so that the return values don't smash them.
*/
proc_stop = 1;
(void) save_syscall_args();
}
repost = 1;
}
/*
* Similarly check to see if SIGPROF might be sent.
*/
(void) save_syscall_args();
sigprof = 1;
}
if (error == 0) {
#ifdef SYSCALLTRACE
if (syscalltrace) {
"%d: r_val1=0x%lx, r_val2=0x%lx, id 0x%p\n",
}
#endif /* SYSCALLTRACE */
} else {
int sig;
#ifdef SYSCALLTRACE
if (syscalltrace) {
printf("%d: error=%d, id 0x%p\n",
}
#endif /* SYSCALLTRACE */
}
/*
* The default action is to redo the trap instruction.
* We increment the pc and npc past it for NORMALRETURN.
* JUSTRETURN has set up a new pc and npc already.
* If we are a cloned thread of forkall(), don't
* adjust here because we have already inherited
* the adjusted values from our clone.
*/
}
}
/*
* From the proc(4) manual page:
* When exit from a system call is being traced, the traced process
* stops on completion of the system call just prior to checking for
* signals and returning to user level. At this point all return
* values have been stored into the traced process's saved registers.
*/
if (proc_stop) {
mutex_enter(&p->p_lock);
mutex_exit(&p->p_lock);
}
/*
* If we are the parent returning from a successful
* vfork, wait for the child to exec or exit.
* This code must be here and not in the bowels of the system
* so that /proc can intercept exit from vfork in a timely way.
*/
if (t->t_flag & T_VFPARENT) {
t->t_flag &= ~T_VFPARENT;
}
/*
* If profiling is active, bill the current PC in user-land
* and keep reposting until profiling is disabled.
*/
if (lwp->lwp_oweupc)
repost = 1;
}
/*
* Reset flag for next time.
* We must do this after stopping on PR_SYSEXIT
* because /proc uses the information in lwp_eosys.
*/
if (t->t_astflag | t->t_sig_check) {
/*
* Turn off the AST flag before checking all the conditions that
* may have caused an AST. This flag is on whenever a signal or
* unusual condition should be handled after the next trap or
* syscall.
*/
astoff(t);
t->t_sig_check = 0;
/*
* The following check is legal for the following reasons:
* 1) The thread we are checking, is ourselves, so there is
* no way the proc can go away.
* 2) The only time we need to be protected by the
* lock is if the binding is changed.
*
* Note we will still take the lock and check the binding
* if the condition was true without the lock held. This
* prevents lock contention among threads owned by the
* same proc.
*/
mutex_enter(&p->p_lock);
}
mutex_exit(&p->p_lock);
}
/*
* for kaio requests on the special kaio poll queue,
* copyout their results to user memory.
*/
if (p->p_aio)
aio_cleanup(0);
/*
* If this LWP was asked to hold, call holdlwp(), which will
* stop. holdlwps() sets this up and calls pokelwps() which
* sets the AST flag.
*
* Also check TP_EXITLWP, since this is used by fresh new LWPs
* through lwp_rtt(). That flag is set if the lwp_create(2)
* syscall failed after creating the LWP.
*/
holdlwp();
/*
* All code that sets signals and makes ISSIG_PENDING
* evaluate true must set t_sig_check afterwards.
*/
if (ISSIG_PENDING(t, lwp, p)) {
psig();
}
if (sigprof) {
}
/*
* If a performance counter overflow interrupt was
* delivered *during* the syscall, then re-enable the
* AST so that we take a trip through trap() to cause
* the SIGEMT to be delivered.
*/
aston(t);
/*
* If an asynchronous hardware error is pending, turn AST flag
* back on. AST will be checked again before we return to user
* mode and we'll come back through trap() to handle the error.
*/
aston(t);
}
/*
* Restore register window if a debugger modified it.
* Set up to perform a single-step if a debugger requested it.
*/
#ifndef NPROBE
/* Kernel probe */
if (tnf_tracing_active) {
repost = 1;
}
#endif /* NPROBE */
/*
* Set state to LWP_USER here so preempt won't give us a kernel
* priority if it occurs after this point. Call CL_TRAPRET() to
* restore the user-level priority.
*
* It is important that no locks (other than spinlocks) be entered
* after this point before returning to user mode (unless lwp_state
* is set back to LWP_SYS).
*
* Sampled times past this point are charged to the user.
*/
if (t->t_trapret) {
t->t_trapret = 0;
thread_lock(t);
CL_TRAPRET(t);
thread_unlock(t);
}
preempt();
prunstop();
/*
* t_post_sys will be set if pcb_step is active.
*/
prdostep();
repost = 1;
}
t->t_sysnum = 0; /* no longer in a system call */
/*
* In case the args were copied to the lwp, reset the
* pointer so the next syscall will have the right lwp_ap pointer.
*/
lwp->lwp_argsaved = 0;
/*
* If there was a continuing reason for post-syscall processing,
* set the t_post_sys flag for the next system call.
*/
if (repost)
t->t_post_sys = 1;
/*
* If there is a ustack registered for this lwp, and the stack rlimit
* has been altered, read in the ustack. If the saved stack rlimit
* matches the bounds of the ustack, update the ustack to reflect
* the new rlimit. If the new stack rlimit is RLIM_INFINITY, disable
* stack checking by setting the size to 0.
*/
mutex_enter(&p->p_lock);
top = p->p_usrstack;
mutex_exit(&p->p_lock);
new_size = 0;
if (model == DATAMODEL_NATIVE) {
sizeof (stack_t)) == 0 &&
sizeof (stack_t));
}
} else {
sizeof (stack32_t)) == 0 &&
sizeof (stack32_t));
}
}
lwp->lwp_old_stk_ctl = 0;
}
}
/*
* Call a system call which takes a pointer to the user args struct and
* a pointer to the return values. This is a bit slower than the standard
* C arg-passing method in some cases.
*/
{
/*
* If the arguments don't fit in registers %o0 - o5, make sure they
* have been copied to the lwp_arg array.
*/
if (error)
}
/*
* Load system call module.
* Returns with pointer to held read lock for module.
*/
static krwlock_t *
{
int id;
/*
* Optimization to only call modload if we don't have a loaded
* syscall.
*/
if (LOADED_SYSCALL(callp))
return (module_lock);
for (;;) {
break;
/*
* If we loaded successfully at least once, the modctl
* will still be valid, so we try to grab it by filename.
* If this call fails, it's because the mod_filename
* was changed after the call to modload() (mod_hold_by_name()
* is the likely culprit). We can safely just take
* another lap if this is the case; the modload() will
* change the mod_filename back to one by which we can
* find the modctl.
*/
continue;
if (!modp->mod_installed) {
continue;
}
break;
}
if (id != -1)
return (module_lock);
}
/*
* Loadable syscall support.
* If needed, load the module, then reserve it by holding a read
* lock for the duration of the call.
* Later, if the syscall is not unloadable, it could patch the vector.
*/
/*ARGSUSED*/
{
int code;
/*
* Try to autoload the system call if necessary.
*/
THREAD_KPRI_RELEASE(); /* drop priority given by rw_enter */
/*
* we've locked either the loaded syscall or nosys
*/
} else {
rval = syscall_ap();
}
THREAD_KPRI_REQUEST(); /* regain priority from read lock */
return (rval);
}
/*
* Handle indirect system calls.
* This interface should be deprecated. The library can handle
* this more efficiently, but keep this implementation for old binaries.
*
* XX64 Needs some work.
*/
{
return (nosys());
/*
* Handle argument setup, unless already done in pre_syscall().
*/
if (save_syscall_args()) /* move args to LWP array */
} else if (!lwp->lwp_argsaved) {
long *ap;
}
}
/*
* set_errno - set an error return from the current system call.
* This could be a macro.
* This returns the value it is passed, so that the caller can
* use tail-recursion-elimination and do return (set_errno(ERRNO));
*/
{
}
/*
* set_proc_pre_sys - Set pre-syscall processing for entire process.
*/
void
{
kthread_t *t;
do {
t->t_pre_sys = 1;
}
/*
* set_proc_post_sys - Set post-syscall processing for entire process.
*/
void
{
kthread_t *t;
do {
t->t_post_sys = 1;
}
/*
* set_proc_sys - Set pre- and post-syscall processing for entire process.
*/
void
set_proc_sys(proc_t *p)
{
kthread_t *t;
do {
t->t_pre_sys = 1;
t->t_post_sys = 1;
}
/*
* set_all_proc_sys - set pre- and post-syscall processing flags for all
* user processes.
*
* This is needed when auditing, tracing, or other facilities which affect
* all processes are turned on.
*/
void
{
kthread_t *t;
do {
t->t_pre_sys = 1;
t->t_post_sys = 1;
}
/*
* set_proc_ast - Set asynchronous service trap (AST) flag for all
* threads in process.
*/
void
set_proc_ast(proc_t *p)
{
kthread_t *t;
do {
aston(t);
}