ptrace.c revision 9acbbeaf2a1ffe5c14b244867d427714fab43c5c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <errno.h>
#include <sys/lx_debug.h>
#include <sys/lx_syscall.h>
#include <sys/lx_signal.h>
#include <sys/lx_thread.h>
#include <unistd.h>
#include <fcntl.h>
#include <procfs.h>
#include <strings.h>
#include <signal.h>
#include <stddef.h>
#include <stdlib.h>
#include <thread.h>
#include <pthread.h>
#include <synch.h>
#include <elf.h>
#include <ieeefp.h>
#include <assert.h>
#include <libintl.h>
/*
* Linux ptrace compatibility.
*
* The brand support for ptrace(2) is built on top of the Solaris /proc
* complicated due to the way ptrace works and the Solaris realization of the
* Linux threading model.
*
* ptrace can only interact with a process if we are tracing it, and it is
* currently stopped. There are two ways a process can begin tracing another
* process:
*
* PTRACE_TRACEME
*
* A child process can use PTRACE_TRACEME to indicate that it wants to be
* traced by the parent. This sets the ptrace compatibility flag in /proc
* which causes ths ptrace consumer to be notified through the wait(2)
* system call of events of interest. PTRACE_TRACEME is typically used by
* the debugger by forking a process, using PTRACE_TRACEME, and finally
* doing an exec of the specified program.
*
*
* PTRACE_ATTACH
*
* We can attach to a process using PTRACE_ATTACH. This is considerably
* more complicated than the previous case. On Linux, the traced process is
* effectively reparented to the ptrace consumer so that event notification
* can go through the normal wait(2) system call. Solaris has no such
* ability to reparent a process (nor should it) so some trickery was
* required.
*
* When the ptrace consumer uses PTRACE_ATTACH it forks a monitor child
* process. The monitor enables the /proc ptrace flag for itself and uses
* the native /proc mechanisms to observe the traced process and wait for
* events of interest. When the traced process stops, the monitor process
* sends itself a SIGTRAP thus rousting its parent process (the ptrace
* consumer) out of wait(2). We then translate the process id and status
* code from wait(2) to those of the traced process.
*
* To detach from the process we just have to clean up tracing flags and
* clean up the monitor.
*
* ptrace can only interact with a process if we have traced it, and it is
* currently stopped (see is_traced()). For threads, there's no way to
* distinguish whether ptrace() has been called for all threads or some
* subset. Since most clients will be tracing all threads, and erroneously
* allowing ptrace to access a non-traced thread is non-fatal (or at least
* would be fatal on linux), we ignore this aspect of the problem.
*/
#define LX_PTRACE_TRACEME 0
#define LX_PTRACE_PEEKTEXT 1
#define LX_PTRACE_PEEKDATA 2
#define LX_PTRACE_PEEKUSER 3
#define LX_PTRACE_POKETEXT 4
#define LX_PTRACE_POKEDATA 5
#define LX_PTRACE_POKEUSER 6
#define LX_PTRACE_CONT 7
#define LX_PTRACE_KILL 8
#define LX_PTRACE_SINGLESTEP 9
#define LX_PTRACE_GETREGS 12
#define LX_PTRACE_SETREGS 13
#define LX_PTRACE_GETFPREGS 14
#define LX_PTRACE_SETFPREGS 15
#define LX_PTRACE_ATTACH 16
#define LX_PTRACE_DETACH 17
#define LX_PTRACE_GETFPXREGS 18
#define LX_PTRACE_SETFPXREGS 19
#define LX_PTRACE_SYSCALL 24
/*
* This corresponds to the user_i387_struct Linux structure.
*/
typedef struct lx_user_fpregs {
long lxuf_cwd;
long lxuf_swd;
long lxuf_twd;
long lxuf_fip;
long lxuf_fcs;
long lxuf_foo;
long lxuf_fos;
long lxuf_st_space[20];
/*
* This corresponds to the user_fxsr_struct Linux structure.
*/
typedef struct lx_user_fpxregs {
long lxux_fip;
long lxux_fcs;
long lxux_foo;
long lxux_fos;
long lxux_mxcsr;
long lxux_reserved;
long lxux_st_space[32];
long lxux_xmm_space[32];
long lxux_padding[56];
/*
* This corresponds to the user_regs_struct Linux structure.
*/
typedef struct lx_user_regs {
long lxur_ebx;
long lxur_ecx;
long lxur_edx;
long lxur_esi;
long lxur_edi;
long lxur_ebp;
long lxur_eax;
long lxur_xds;
long lxur_xes;
long lxur_xfs;
long lxur_xgs;
long lxur_orig_eax;
long lxur_eip;
long lxur_xcs;
long lxur_eflags;
long lxur_esp;
long lxur_xss;
typedef struct lx_user {
int lxu_fpvalid;
long lxu_signal;
int lxu_reserved;
char lxu_comm[32];
int lxu_debugreg[8];
} lx_user_t;
typedef struct ptrace_monitor_map {
typedef struct ptrace_state_map {
extern void *_START_;
static sigset_t blockable_sigs;
#pragma init(ptrace_init)
void
ptrace_init(void)
{
(void) sigfillset(&blockable_sigs);
}
/*
* given mode.
*/
static int
{
char path[MAXPATHLEN];
}
/*
* Given a pid and lwpid, open the named file under
*/
static int
{
char path[MAXPATHLEN];
}
static int
{
int fd;
return (-ESRCH);
return (-EIO);
}
return (0);
}
static int
{
int fd;
return (-ESRCH);
return (-EIO);
}
return (0);
}
static uintptr_t
{
int afd;
/*
* Try to walk the stack looking for a return address that corresponds
* to the traced process's lx_emulate_done symbol. This relies on the
* fact that the brand library in the traced process is the same as the
* brand library in this process (indeed, this is true of all processes
* in a given branded zone).
*/
/*
* Find the base address for the brand library in the traced process
* by grabbing the AT_PHDR auxv entry, reading in the program header
* at that location and subtracting off the p_vaddr member. We use
* this to compute the location of lx_emulate done in the traced
* process.
*/
return (0);
do {
return (0);
}
lx_debug("failed to read brand library's phdr");
return (0);
}
do {
lx_debug("ptrace read failed for stack walk");
return (0);
}
lx_debug("ptrace stack not monotonically increasing "
return (0);
}
/*
* The first argument to lx_emulate is known to be an lx_regs_t
* structure and the ABI specifies that it will be placed on the stack
* immediately preceeding the return address.
*/
lx_debug("ptrace stack failed to read register set address");
return (0);
}
return (addr);
}
static int
{
return (ret);
return (-ESRCH);
/*
* If we find the syscall regs (and are therefore in an emulated
* syscall, use the register set at given address. Otherwise, use the
* registers as reported by /proc.
*/
lx_debug("ptrace failed to read register set");
return (-EIO);
}
} else {
rp->lxur_orig_eax = 0;
/*
* If the target process has just returned from exec, it's not
* going to be sitting in the emulation function. In that case
* we need to manually fake up the values for %eax and orig_eax
* to indicate a successful return and that the traced process
* had called execve (respectively).
*/
}
}
return (0);
}
static int
{
return (ret);
return (-ESRCH);
/*
* If we find the syscall regs (and are therefore in an emulated
* syscall, modify the register set at given address and set the
* remaining registers through the /proc interface. Otherwise just use
* the /proc interface to set register values;
*/
lx_debug("ptrace failed to write register set");
return (-EIO);
}
} else {
}
return (-ESRCH);
return (-EIO);
}
return (0);
}
static int
{
char *data;
int ret, i;
return (ret);
/*
* The Linux structure uses 10 bytes per floating-point register.
*/
for (i = 0; i < 8; i++) {
data += 10;
}
return (0);
}
static int
{
struct {
long cmd;
} ctl;
char *data;
return (ret);
/*
* The Linux structure uses 10 bytes per floating-point register.
*/
for (i = 0; i < 8; i++) {
data += 10;
}
return (-ESRCH);
return (-EIO);
}
return (0);
}
static int
{
int ret, i;
return (ret);
for (i = 0; i < 8; i++) {
}
return (0);
}
static int
{
struct {
long cmd;
} ctl;
return (ret);
for (i = 0; i < 8; i++) {
}
return (-ESRCH);
return (-EIO);
}
return (0);
}
/*
* Solaris does not allow a process to manipulate its own or some
* other process's debug registers. Linux ptrace(2) allows this
* and gdb manipulates them for its watchpoint implementation.
*
* We keep a pseudo set of debug registers for each traced process
* and map their contents into the appropriate PCWATCH /proc
* operations when they are activated by gdb.
*
* To understand how the debug registers work on x86 machines,
* see section 13.1 of the AMD x86-64 Architecture Programmer's
* Manual, Volume 2, System Programming.
*/
static uintptr_t *
{
(void) mutex_lock(&ptrace_map_mtx);
break;
}
bzero(p, sizeof (*p));
p->psm_next = ptrace_state_map;
ptrace_state_map = p;
}
(void) mutex_unlock(&ptrace_map_mtx);
}
static void
{
/* ASSERT(MUTEX_HELD(&ptrace_map_mtx) */
free(p);
break;
}
}
}
static int
{
int lrw;
int fd;
int nwatch;
int i;
int wflags;
int error;
struct {
long req;
} ctl;
/* find all watched areas */
return (-ESRCH);
return (-ESRCH);
/* clear all watched areas */
for (i = 0; i < nwatch; i++) {
return (error);
}
}
/* establish all new watched areas */
for (i = 0; i < 4; i++) {
continue;
case 0: size = 1; break;
}
case 2: continue;
}
return (error);
}
}
return (0);
}
/*
* Returns TRUE if the process is traced, FALSE otherwise. This is only true
* if the process is currently stopped, and has been traced using PTRACE_TRACEME
* or PTRACE_ATTACH.
*/
static int
{
return (0);
return (1);
(void) mutex_lock(&ptrace_map_mtx);
if (p->pmm_target == pid) {
(void) mutex_unlock(&ptrace_map_mtx);
return (1);
}
}
(void) mutex_unlock(&ptrace_map_mtx);
return (0);
}
static int
ptrace_trace_common(int fd)
{
struct {
long cmd;
union {
long flags;
} arg;
} ctl;
return (-1);
return (-1);
size = sizeof (long) + sizeof (long);
return (-1);
return (0);
}
/*
* Notify that parent that we wish to be traced. This is the equivalent of:
*
* 1. Stop on all signals, and nothing else
* 2. Turn off inherit-on-fork flag
* 3. Set ptrace compatible flag
*
* If we are not the main thread, then the client is trying to request behavior
* by which one of its own thread is to be traced. We don't support this mode
* of operation.
*/
static int
ptrace_traceme(void)
{
int error;
long ctl[2];
if (_lwp_self() != 1) {
"thread %d calling PTRACE_TRACEME is unsupported"),
_lwp_self());
return (-ENOTSUP);
}
return (ret);
/*
* Why would a process try to do this twice? I'm not sure, but there's
* a conformance test which wants this to fail just so.
*/
return (-EPERM);
return (-errno);
error = 0;
ptrace_trace_common(fd) != 0)
return (error);
}
/*
* Read a word of data from the given address. Because this is a process-wide
* action, we don't need the lwpid.
*/
static int
{
return (-ESRCH);
return (-ESRCH);
return (-EIO);
}
return (-errno);
return (0);
}
#define LX_USER_BOUND(m) \
static int
{
int dreg;
return (-ESRCH);
/*
* The offset specified by the user is an offset into the Linux
* user structure (seriously). Rather than constructing a full
* user structure, we figure out which part of the user structure
* the offset is in, and fill in just that component.
*/
return (err);
assert(0);
return (err);
assert(0);
assert(0);
assert(0);
assert(0);
assert(0);
assert(0);
assert(0);
assert(0);
assert(0);
assert(0);
assert(0);
dreg = 6;
dreg = 7;
else
data = 0;
} else {
assert(0);
return (-ENOTSUP);
}
return (-errno);
return (0);
}
/*
* Write a word of data to the given address. Because this is a process-wide
* action, we don't need the lwpid. Returns EINVAL if the address is not
* word-aligned.
*/
static int
{
int fd;
return (-ESRCH);
if (addr & 0x3)
return (-EINVAL);
return (-ESRCH);
return (-EIO);
}
return (0);
}
static int
{
int err = 0;
int dreg;
return (-ESRCH);
if (off & 0x3)
return (-EINVAL);
return (err);
}
dreg = 6;
dreg = 7;
if (dreg == 7)
}
return (err);
}
"poke", off);
assert(0);
return (-ENOTSUP);
}
static int
{
/*
* Clear the current signal.
*/
/*
* Send a signal if one was specified.
*/
}
/*
* If run is true, set the lwp running.
*/
if (run) {
}
return (-EIO);
}
return (0);
}
static int
{
long ctl[2];
int fd;
if (fd < 0) {
lx_debug("failed to open monitor ctl %d",
errno);
return (-EIO);
}
return (-EIO);
}
return (0);
}
static int
{
return (-ESRCH);
return (-EINVAL);
return (-ESRCH);
return (ret);
}
/* kludge: use debugreg[4] to remember the single-step flag */
/*
* Check for a monitor and get it moving if we find it. If any of the
* /proc operations fail, we're kind of sunk so just return an error.
*/
(void) mutex_lock(&ptrace_map_mtx);
if (p->pmm_target == lxpid) {
if ((ret = ptrace_cont_monitor(p)) != 0)
return (ret);
break;
}
}
(void) mutex_unlock(&ptrace_map_mtx);
return (0);
}
/*
* If a monitor exists for this traced process, dispose of it.
* First turn off its ptrace flag so we won't be notified of its
* impending demise. We ignore errors for this step since they
* indicate only that the monitor has been damaged due to pilot
* error. Then kill the monitor, and wait for it. If the wait
* succeeds we can dispose of the corpse, otherwise another thread's
* wait call has collected it and we need to set a flag in the
* structure so that if can be picked up in wait.
*/
static void
{
ptrace_monitor_map_t *p, **pp;
int fd;
long ctl[2];
(void) mutex_lock(&ptrace_map_mtx);
if (p->pmm_target == lxpid) {
mpid = p->pmm_monitor;
}
free(p);
} else {
p->pmm_exiting = 1;
}
break;
}
}
(void) mutex_unlock(&ptrace_map_mtx);
}
static int
{
int ret;
return (-ESRCH);
/* kill off the monitor process, if any */
return (ret);
}
static int
{
}
static int
{
int ret;
return (-ESRCH);
return (ret);
return (-errno);
return (0);
}
static int
{
return (-ESRCH);
return (-errno);
}
static int
{
int ret;
return (-ESRCH);
return (ret);
return (-errno);
return (0);
}
static int
{
return (-ESRCH);
return (-errno);
}
static int
{
int ret;
return (-ESRCH);
return (ret);
return (-errno);
return (0);
}
static int
{
return (-ESRCH);
return (-errno);
}
static void __NORETURN
ptrace_monitor(int fd)
{
struct {
long cmd;
union {
long flags;
} arg;
} ctl;
int monfd;
int rv;
size = sizeof (long) + sizeof (long);
size = sizeof (long) + sizeof (long);
size = sizeof (long);
for (;;) {
/*
* Wait for the traced process to stop.
*/
lx_debug("monitor failed to wait for LWP to stop: %s",
}
lx_debug("monitor caught traced LWP");
/*
* Pull the ptrace trigger by sending ourself a SIGTRAP. This
* will cause this, the monitor process, to stop which will
* cause the parent's waitid(2) call to return this process
* id. In lx_wait(), we remap the monitor process's pid and
* status to those of the traced LWP. When the parent process
* uses ptrace to resume the traced LWP, it will additionally
* restart this process.
*/
lx_debug("monitor was resumed");
}
}
static int
{
int ret;
/*
* We're going to need this structure so better to fail now before its
* too late to turn back.
*/
return (-EIO);
free(p);
return (ret);
}
/*
* If this process is already traced, bail.
*/
free(p);
return (-EPERM);
}
/*
* Turn on the appropriate tracing flags. It's exceedingly unlikely
* that this operation will fail; any failure would probably be due
* to another /proc consumer mucking around.
*/
if (ptrace_trace_common(fd) != 0) {
free(p);
return (-EIO);
}
/*
* Native ptrace automatically catches processes when they exec so we
* have to do that explicitly here.
*/
if (run) {
*ctlp++ = 0;
}
free(p);
return (-EIO);
}
/*
* Spawn the monitor proceses to notify this process of events of
* interest in the traced process. We block signals here both so
* we're not interrupted during this operation and so that the
* monitor process doesn't accept signals.
*/
if (child == -1) {
lx_debug("failed to fork monitor process\n");
free(p);
return (-EIO);
}
p->pmm_monitor = child;
p->pmm_target = lxpid;
p->pmm_exiting = 0;
(void) mutex_lock(&ptrace_map_mtx);
p->pmm_next = ptrace_monitor_map;
ptrace_monitor_map = p;
(void) mutex_unlock(&ptrace_map_mtx);
return (0);
}
static int
{
long ctl;
/*
* Linux doesn't let you trace process 1 -- go figure.
*/
if (lxpid == 1)
return (-EPERM);
assert(0);
}
return (ret);
}
static int
{
long ctl[2];
return (-ESRCH);
return (-EINVAL);
return (-ESRCH);
/*
* The /proc ptrace flag may not be set, but we clear it
* unconditionally since doing so doesn't hurt anything.
*/
return (-EIO);
}
/*
* Clear the brand-specific system call tracing flag to ensure that
* the target doesn't stop unexpectedly some time in the future.
*/
return (-ret);
}
/* kill off the monitor process, if any */
/*
* Turn on the run-on-last-close flag so that all tracing flags will be
* cleared when we close the control file descriptor.
*/
return (-EIO);
}
/*
* Clear the current signal (if any) and possibly send the traced
* process a new signal.
*/
return (ret);
}
static int
{
int ret;
return (-ESRCH);
return (-ret);
}
int
{
if ((p1 != LX_PTRACE_TRACEME) &&
return (-ESRCH);
switch (p1) {
case LX_PTRACE_TRACEME:
return (ptrace_traceme());
case LX_PTRACE_PEEKTEXT:
case LX_PTRACE_PEEKDATA:
case LX_PTRACE_PEEKUSER:
case LX_PTRACE_POKETEXT:
case LX_PTRACE_POKEDATA:
case LX_PTRACE_POKEUSER:
case LX_PTRACE_CONT:
case LX_PTRACE_KILL:
case LX_PTRACE_SINGLESTEP:
case LX_PTRACE_GETREGS:
case LX_PTRACE_SETREGS:
case LX_PTRACE_GETFPREGS:
case LX_PTRACE_SETFPREGS:
case LX_PTRACE_ATTACH:
case LX_PTRACE_DETACH:
case LX_PTRACE_GETFPXREGS:
case LX_PTRACE_SETFPXREGS:
case LX_PTRACE_SYSCALL:
default:
return (-EINVAL);
}
}
void
lx_ptrace_fork(void)
{
/*
* Send a special signal (that has no Linux equivalent) to indicate
* that we're in this particularly special case. The signal will be
* ignored by this process, but noticed by /proc consumers tracing
* this process.
*/
}
static void
{
long *ctlp;
/*
* If any of this fails, we're really sunk since the child
* will be stuck in the middle of lx_ptrace_fork().
* Fortunately it's practically assured to succeed unless
* something is seriously wrong on the system.
*/
lx_debug("lx_catch_fork: failed to control %d",
(int)pid);
return;
}
/*
* Turn off the /proc PR_PTRACE flag so the parent doesn't get
* spurious wake ups while we're working our dark magic. Arrange to
* catch the process when it exits from fork, and turn on the /proc
* inherit-on-fork flag so we catcht the child as well. We then run
* the process, wait for it to stop on the fork1(2) call and reset
* the tracing flags to their original state.
*/
if (!monitor) {
}
*ctlp++ = 0;
if (!monitor) {
}
if (monitor) {
}
lx_debug("lx_catch_fork: failed to set %d running",
(int)pid);
return;
}
/*
* Get the status so we can find the value returned from fork1() --
* the child process's pid.
*/
lx_debug("lx_catch_fork: failed to get status for %d",
(int)pid);
return;
}
/*
* We're done with the parent -- off you go.
*/
ctl[1] = 0;
size = 2 * sizeof (long);
lx_debug("lx_catch_fork: failed to set %d running",
(int)pid);
return;
}
/*
* If fork1(2) failed, we're done.
*/
if (child < 0) {
lx_debug("lx_catch_fork: fork1 failed");
return;
}
/*
* Now we need to screw with the child process.
*/
lx_debug("lx_catch_fork: failed to control %d",
(int)child);
return;
}
lx_debug("lx_catch_fork: failed to clear trace flags for %d",
(int)child);
return;
}
/*
* Now treat the child as though we had attached to it explicitly.
*/
}
static void
{
int dr7;
int lrw;
int i;
return;
case TRAP_TRACE:
break;
case TRAP_RWATCH:
case TRAP_WWATCH:
case TRAP_XWATCH:
for (i = 0; i < 4; i++) {
continue;
case 0: size = 1; break;
}
}
/*
* Were we also attempting a single-step?
* (kludge: we use debugreg[4] for this flag.)
*/
if (debugreg[4])
break;
default:
break;
}
}
/*
* This is called from the emulation of the wait4 and waitpid system call to
* take into account the monitor processes which we spawn to observe other
* processes from ptrace_attach().
*/
int
{
ptrace_monitor_map_t *p, **pp;
int fd;
/*
* If the process observed by waitid(2) corresponds to the monitor
* process for a traced thread, we need to rewhack the siginfo_t to
* look like it came from the traced thread with the flags set
* according to the current state.
*/
(void) mutex_lock(&ptrace_map_mtx);
if (p->pmm_monitor == pid) {
goto found;
}
}
(void) mutex_unlock(&ptrace_map_mtx);
/*
* If the traced process got a SIGWAITING, we must be in the middle
* of a clone(2) with CLONE_PTRACE set.
*/
ptrace_catch_fork(pid, 0);
return (-1);
}
return (0);
/*
* If the monitor is in the exiting state, ignore the event and free
* the monitor structure if the monitor has exited. By returning -1 we
* indicate to the caller that this was a spurious return from
* waitid(2) and that it should ignore the result and try again.
*/
if (p->pmm_exiting) {
(void) mutex_unlock(&ptrace_map_mtx);
free(p);
}
return (-1);
}
lxpid = p->pmm_target;
(void) mutex_unlock(&ptrace_map_mtx);
/*
* If we can't find the traced process, kill off its monitor.
*/
return (0);
}
assert(0);
}
/*
* If the traced process isn't stopped, this is a truly spurious
* event probably caused by another /proc consumer tracing the
* monitor.
*/
(void) ptrace_cont_monitor(p);
return (-1);
}
case PR_SIGNALLED:
/*
* If the traced process got a SIGWAITING, we must be in the
* middle of a clone(2) with CLONE_PTRACE set.
*/
(void) ptrace_cont_monitor(p);
return (-1);
}
break;
case PR_REQUESTED:
/*
* Make it look like the traced process stopped on an
* event of interest.
*/
break;
case PR_JOBCONTROL:
/*
* Ignore this as it was probably caused by another /proc
* consumer tracing the monitor.
*/
(void) ptrace_cont_monitor(p);
return (-1);
case PR_SYSEXIT:
/*
* Processes traced via a monitor (rather than using the
* native Solaris ptrace support) explicitly trace returns
* from exec system calls since it's an implicit ptrace
* trace point. Accordingly we need to present a process
* in that state as though it had reached the ptrace trace
* point.
*/
break;
}
/*FALLTHROUGH*/
case PR_SYSENTRY:
case PR_FAULTED:
case PR_SUSPENDED:
default:
assert(0);
}
return (0);
}