prsubr.c revision 1959771bb9d00755c3afb4b651809b6b6f4f4cb3
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
#include <sys/sysmacros.h>
#include <sys/shm_impl.h>
#include <sys/processor.h>
#include <sys/contract_impl.h>
#include <sys/schedctl.h>
#define MAX_ITERS_SPIN 5
typedef struct prpagev {
char *pg_incore; /* vector of incore flags */
} prpagev_t;
/*
* Choose an lwp from the complete set of lwps for the process.
* This is called for any operation applied to the process
* file descriptor that requires an lwp to operate upon.
*
* Returns a pointer to the thread for the selected LWP,
* and with the dispatcher lock held for the thread.
*
* The algorithm for choosing an lwp is critical for /proc semantics;
* don't touch this code unless you know all of the implications.
*/
{
kthread_t *t;
/*
* If the agent lwp exists, it takes precedence over all others.
*/
thread_lock(t);
return (t);
}
return (t);
do { /* for eacn lwp in the process */
if (VSTOPPED(t)) { /* virtually stopped */
t_req = t;
continue;
}
thread_lock(t); /* make sure thread is in good state */
switch (t->t_state) {
default:
panic("prchoose: bad thread state %d, thread 0x%p",
t->t_state, (void *)t);
/*NOTREACHED*/
case TS_SLEEP:
/* this is filthy */
t_hold = t;
} else {
t_sleep = t;
}
break;
case TS_RUN:
case TS_WAIT:
t_run = t;
break;
case TS_ONPROC:
t_onproc = t;
break;
case TS_ZOMB: /* last possible choice */
break;
case TS_STOPPED:
switch (t->t_whystop) {
case PR_SUSPENDED:
t_susp = t;
break;
case PR_JOBCONTROL:
if (t->t_proc_flag & TP_PRSTOP) {
t_jdstop = t;
} else {
t_jstop = t;
}
break;
case PR_REQUESTED:
t_dtrace = t;
t_req = t;
break;
case PR_SYSENTRY:
case PR_SYSEXIT:
case PR_SIGNALLED:
case PR_FAULTED:
/*
* Make an lwp calling exit() be the
* last lwp seen in the process.
*/
t_istop = t;
break;
case PR_CHECKPOINT: /* can't happen? */
break;
default:
panic("prchoose: bad t_whystop %d, thread 0x%p",
t->t_whystop, (void *)t);
/*NOTREACHED*/
}
break;
}
thread_unlock(t);
if (t_onproc)
t = t_onproc;
else if (t_run)
t = t_run;
else if (t_sleep)
t = t_sleep;
else if (t_jstop)
t = t_jstop;
else if (t_jdstop)
t = t_jdstop;
else if (t_istop)
t = t_istop;
else if (t_dtrace)
t = t_dtrace;
else if (t_req)
t = t_req;
else if (t_hold)
t = t_hold;
else if (t_susp)
t = t_susp;
else /* TS_ZOMB */
t = p->p_tlist;
if (t != NULL)
thread_lock(t);
return (t);
}
/*
* Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
* on the /proc file descriptor. Called from stop() when a traced
* process stops on an event of interest. Also called from exit()
* and prinvalidate() to indicate POLLHUP and POLLERR respectively.
*/
void
{
/*
* We call pollwakeup() with POLLHUP to ensure that
* the pollers are awakened even if they are polling
* for nothing (i.e., waiting for the process to exit).
* This enables the use of the PRC_POLL flag for optimization
* (we can turn off PRC_POLL only if we know no pollers remain).
*/
}
}
/* called immediately below, in prfree() */
static void
{
/*
* We can't call prnotify() here because we are holding
* pidlock. We assert that there is no need to.
*/
}
}
/*
* Called from a hook in freeproc() when a traced process is removed
* from the process table. The proc-table pointers of all associated
* /proc vnodes are cleared to indicate that the process has gone away.
*/
void
{
/*
* Block the process against /proc so it can be freed.
* It cannot be freed while locked by some controlling process.
* Lock ordering:
* pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
*/
mutex_enter(&p->p_lock);
while (p->p_proc_flag & P_PR_LOCK) {
mutex_exit(&p->p_lock);
mutex_enter(&p->p_lock);
}
prfreenotify(p->p_plist);
prfreenotify(p->p_trace);
/*
* We broadcast to wake up everyone waiting for this process.
* No one can reach this process from this point on.
*/
mutex_exit(&p->p_lock);
}
/*
* Called from a hook in exit() when a traced process is becoming a zombie.
*/
void
{
if (pr_watch_active(p)) {
}
/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
if (p->p_trace) {
}
}
/*
* Called when a thread calls lwp_exit().
*/
void
{
/*
* The process must be blocked against /proc to do this safely.
* The lwp must not disappear while the process is marked P_PR_LOCK.
* It is the caller's responsibility to have called prbarrier(p).
*/
if (pcp->prc_thread == t) {
}
}
}
if (p->p_trace)
}
/*
* Called when a zombie thread is joined or when a
* detached lwp exits. Called from lwp_hash_out().
*/
void
{
/*
* The process must be blocked against /proc to do this safely.
* The lwp must not disappear while the process is marked P_PR_LOCK.
* It is the caller's responsibility to have called prbarrier(p).
*/
while (vp) {
}
if (p->p_trace)
}
/*
* Called from a hook in exec() when a thread starts exec().
*/
void
prexecstart(void)
{
/*
* The P_PR_EXEC flag blocks /proc operations for
* the duration of the exec().
* We can't start exec() while the process is
* locked by /proc, so we call prbarrier().
* lwp_nostop keeps the process from being stopped
* via job control for the duration of the exec().
*/
prbarrier(p);
lwp->lwp_nostop++;
p->p_proc_flag |= P_PR_EXEC;
}
/*
* Called from a hook in exec() when a thread finishes exec().
* The thread may or may not have succeeded. Some other thread
* may have beat it to the punch.
*/
void
prexecend(void)
{
lwp->lwp_nostop--;
/*
* We are on our way to exiting because some
* other thread beat us in the race to exec().
* Don't clear the P_PR_EXEC flag in this case.
*/
return;
}
/*
* Wake up anyone waiting in /proc for the process to complete exec().
*/
p->p_proc_flag &= ~P_PR_EXEC;
}
}
/*
* We dealt with the process common above.
*/
}
}
}
/*
* Called from a hook in relvm() just before freeing the address space.
* We free all the watched areas now.
*/
void
prrelvm(void)
{
mutex_enter(&p->p_lock);
prbarrier(p); /* block all other /proc operations */
if (pr_watch_active(p)) {
}
mutex_exit(&p->p_lock);
}
/*
* Called from hooks in exec-related code when a traced process
* file. Rather than fail the exec we invalidate the associated
* /proc vnodes so that subsequent attempts to use them will fail.
*
* All /proc vnodes, except directory vnodes, are retained on a linked
* list (rooted at p_plist in the process structure) until last close.
*
* A controlling process must re-open the /proc files in order to
* regain control.
*/
void
{
int writers = 0;
mutex_enter(&p->p_lock);
prbarrier(p); /* block all other /proc operations */
/*
* At this moment, there can be only one lwp in the process.
*/
/*
* Invalidate any currently active /proc vnodes.
*/
case PR_PSINFO: /* these files can read by anyone */
case PR_LPSINFO:
case PR_LWPSINFO:
case PR_LWPDIR:
case PR_LWPIDDIR:
case PR_USAGE:
case PR_LUSAGE:
case PR_LWPUSAGE:
break;
default:
break;
}
}
/*
* Wake up anyone waiting for the process or lwp.
* p->p_trace is guaranteed to be non-NULL if there
* are any open /proc files for this process.
*/
/*
* Are there any writers?
*/
/*
* Clear the exclusive open flag (old /proc interface).
* Set prc_selfopens equal to prc_writers so that
* the next O_EXCL|O_WRITE open will succeed
* even with existing (though invalid) writers.
* prclose() must decrement prc_selfopens when
* the invalid files are closed.
*/
}
}
/*
* We should not invalidate the lwpiddir vnodes,
* but the necessities of maintaining the old
* ioctl()-based version of /proc require it.
*/
}
/*
* If any tracing flags are in effect and any vnodes are open for
* writing then set the requested-stop and run-on-last-close flags.
* Otherwise, clear all tracing flags.
*/
t->t_proc_flag &= ~TP_PAUSE;
t->t_proc_flag |= TP_PRSTOP;
aston(t); /* so ISSIG will see the flag */
p->p_proc_flag |= P_PR_RUNLCL;
} else {
}
mutex_exit(&p->p_lock);
}
/*
* Acquire the controlled process's p_lock and mark it P_PR_LOCK.
* Return with pr_pidlock held in all cases.
* Return with p_lock held if the the process still exists.
* Return value is the process pointer if the process still exists, else NULL.
* If we lock the process, give ourself kernel priority to avoid deadlocks;
* this is undone in prunlock().
*/
proc_t *
{
proc_t *p;
return (NULL);
mutex_enter(&p->p_lock);
while (p->p_proc_flag & P_PR_LOCK) {
/*
* the process disappears while we sleep.
*/
mutex_exit(mp);
return (NULL);
mutex_enter(&p->p_lock);
}
p->p_proc_flag |= P_PR_LOCK;
return (p);
}
/*
* Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
* This prevents any lwp of the process from disappearing and
* blocks most operations that a process can perform on itself.
* Returns 0 on success, a non-zero error number on failure.
*
* 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
* the subject process is a zombie (ZYES) or fail for zombies (ZNO).
*
* error returns:
* ENOENT: process or lwp has disappeared or process is exiting
* (or has become a zombie and zdisp == ZNO).
* EAGAIN: procfs vnode has become invalid.
* EINTR: signal arrived while waiting for exec to complete.
*/
int
{
proc_t *p;
/*
* Return ENOENT immediately if there is no process.
*/
if (p == NULL)
return (ENOENT);
/*
* Return ENOENT if process entered zombie state or is exiting
* and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
*/
return (ENOENT);
}
/*
* If lwp-specific, check to see if lwp has disappeared.
*/
return (ENOENT);
}
}
/*
* Return EAGAIN if we have encountered a security violation.
* (The process exec'd a set-id or unreadable executable file.)
*/
return (EAGAIN);
}
/*
* If process is undergoing an exec(), wait for
* completion and then start all over again.
*/
if (p->p_proc_flag & P_PR_EXEC) {
return (EINTR);
}
goto again;
}
/*
* We return holding p->p_lock.
*/
return (0);
}
/*
* Undo prlock() and pr_p_lock().
* p->p_lock is still held; pr_pidlock is no longer held.
*
* prunmark() drops the P_PR_LOCK flag and wakes up another thread,
* if any, waiting for the flag to be dropped; it retains p->p_lock.
*
* prunlock() calls prunmark() and then drops p->p_lock.
*/
void
{
p->p_proc_flag &= ~P_PR_LOCK;
}
void
{
/*
* If we (or someone) gave it a SIGKILL, and it is not
* already a zombie, set it running unconditionally.
*/
prunmark(p);
mutex_exit(&p->p_lock);
}
/*
* Called while holding p->p_lock to delay until the process is unlocked.
* We enter holding p->p_lock; p->p_lock is dropped and reacquired.
* The process cannot become locked again until p->p_lock is dropped.
*/
void
{
if (p->p_proc_flag & P_PR_LOCK) {
/* The process is locked; delay until not locked */
while (p->p_proc_flag & P_PR_LOCK)
}
}
/*
* The u-block is mapped in by this routine and unmapped at the end.
*/
void
{
kthread_t *t;
t = prchoose(p); /* returns locked thread */
thread_unlock(t);
/* just bzero the process part, prgetlwpstatus() does the rest */
/*
* Inside local zones, fake zsched's pid as parent pids for
* processes which reference processes outside of the zone.
*/
} else {
}
switch (p->p_model) {
case DATAMODEL_ILP32:
break;
case DATAMODEL_LP64:
break;
}
if (p->p_agenttp)
/* get the chosen lwp's status */
/* replicate the flags */
}
#ifdef _SYSCALL32_IMPL
void
{
int flags;
flags = 0L;
if (t->t_state == TS_STOPPED) {
flags |= PR_STOPPED;
if ((t->t_schedflag & TS_PSTART) == 0)
} else if (VSTOPPED(t)) {
}
if (lwp->lwp_asleep)
if (t == p->p_agenttp)
if (!(t->t_proc_flag & TP_TWAIT))
if (t->t_proc_flag & TP_DAEMON)
if (p->p_proc_flag & P_PR_FORK)
if (p->p_proc_flag & P_PR_RUNLCL)
if (p->p_proc_flag & P_PR_KILLCL)
if (p->p_proc_flag & P_PR_ASYNC)
if (p->p_proc_flag & P_PR_BPTADJ)
if (p->p_proc_flag & P_PR_PTRACE)
if (VSTOPPED(t)) {
} else {
}
if (t->t_whystop == PR_FAULTED) {
if (t->t_whatstop == FLTPAGE)
} else if (lwp->lwp_curinfo)
}
if (flags & PR_STOPPED)
scalehrtime(&usr);
scalehrtime(&sys);
/*
* Fetch the current instruction, if not a system process.
* We don't attempt this unless the lwp is stopped.
*/
else if (!(flags & PR_STOPPED))
else
/*
* Drop p_lock while touching the lwp's stack.
*/
mutex_exit(&p->p_lock);
int i;
}
int i;
} else
i++, auxp++) {
break;
}
}
}
}
if (prhasfp())
mutex_enter(&p->p_lock);
}
void
{
kthread_t *t;
t = prchoose(p); /* returns locked thread */
thread_unlock(t);
/* just bzero the process part, prgetlwpstatus32() does the rest */
/*
* Inside local zones, fake zsched's pid as parent pids for
* processes which reference processes outside of the zone.
*/
} else {
}
switch (p->p_model) {
case DATAMODEL_ILP32:
break;
case DATAMODEL_LP64:
break;
}
if (p->p_agenttp)
/* get the chosen lwp's status */
/* replicate the flags */
}
#endif /* _SYSCALL32_IMPL */
/*
* Return lwp status.
*/
void
{
int flags;
flags = 0L;
if (t->t_state == TS_STOPPED) {
flags |= PR_STOPPED;
if ((t->t_schedflag & TS_PSTART) == 0)
} else if (VSTOPPED(t)) {
}
if (lwp->lwp_asleep)
if (t == p->p_agenttp)
if (!(t->t_proc_flag & TP_TWAIT))
if (t->t_proc_flag & TP_DAEMON)
if (p->p_proc_flag & P_PR_FORK)
if (p->p_proc_flag & P_PR_RUNLCL)
if (p->p_proc_flag & P_PR_KILLCL)
if (p->p_proc_flag & P_PR_ASYNC)
if (p->p_proc_flag & P_PR_BPTADJ)
if (p->p_proc_flag & P_PR_PTRACE)
if (p->p_pgidp->pid_pgorphaned)
if (p->p_pidflag & CLDNOSIGCHLD)
flags |= PR_NOSIGCHLD;
if (p->p_pidflag & CLDWAITPID)
flags |= PR_WAITPID;
if (VSTOPPED(t)) {
} else {
}
if (t->t_whystop == PR_FAULTED)
else if (lwp->lwp_curinfo)
}
if (flags & PR_STOPPED)
scalehrtime(&usr);
scalehrtime(&sys);
/*
* Fetch the current instruction, if not a system process.
* We don't attempt this unless the lwp is stopped.
*/
else if (!(flags & PR_STOPPED))
else
/*
* Drop p_lock while touching the lwp's stack.
*/
mutex_exit(&p->p_lock);
int i;
}
int i;
else
i++, auxp++) {
break;
}
}
}
}
if (prhasfp())
mutex_enter(&p->p_lock);
}
/*
* Get the sigaction structure for the specified signal. The u-block
* must already have been mapped in by the caller.
*/
void
{
}
}
}
#ifdef _SYSCALL32_IMPL
void
{
}
}
}
#endif /* _SYSCALL32_IMPL */
/*
* Count the number of segments in this process's address space.
*/
int
{
int n = 0;
n++;
}
}
return (n);
}
/*
* Convert uint32_t to decimal string w/o leading zeros.
* Add trailing null characters if 'len' is greater than string length.
* Return the string length.
*/
int
{
do {
n /= 10;
} while (n);
do {
*s++ = *--cp;
while (s < end) /* optional pad */
*s++ = '\0';
return (len);
}
/*
* Convert uint64_t to decimal string w/o leading zeros.
* Return the string length.
*/
static int
{
int len;
do {
n /= 10;
} while (n);
do {
*s++ = *--cp;
return (len);
}
void
{
char *s = name;
s += strlen(s);
*s++ = '.';
}
*s++ = '.';
*s++ = '.';
*s++ = '\0';
}
struct seg *
{
if (p->p_brksize != 0)
return (seg);
return (NULL);
}
/*
* Implementation of service functions to handle procfs generic chained
* copyout buffers.
*/
typedef struct pr_iobuf_list {
} piol_t;
void
{
ASSERT(initial_size > 0);
/*
* Someone creating chained copyout buffers may ask for less than
* MAPSIZE if the amount of data to be buffered is known to be
* smaller than that.
* But in order to prevent involuntary self-denial of service,
* the requested input size is clamped at MAPSIZE.
*/
iol->piol_usedsize = 0;
}
void *
{
char *new;
/*
* Out of space in the current buffer. Allocate more.
*/
newiol->piol_usedsize = 0;
}
return (new);
}
int
{
if (!error) {
iol->piol_usedsize))
}
}
return (error);
}
int
{
char *base;
}
return (error);
}
/*
* Return an array of structures with memory map information.
* We allocate here; the caller must deallocate.
*/
int
{
/*
* Request an initial buffer size that doesn't waste memory
* if the address space has only a small number of segments.
*/
return (0);
do {
continue;
if (prot & PROT_WRITE)
if (reserved) {
PAGEOFFSET) & PAGEMASK;
(uintptr_t)prgetstackbase(p) +
}
}
/*
* Manufacture a filename for the "object" directory.
*/
else
}
/*
* Get the SysV shared memory id, if any.
*/
SHMID_NONE) {
} else {
}
}
return (0);
}
#ifdef _SYSCALL32_IMPL
int
{
/*
* Request an initial buffer size that doesn't waste memory
* if the address space has only a small number of segments.
*/
return (0);
do {
continue;
if (prot & PROT_WRITE)
if (reserved) {
PAGEOFFSET) & PAGEMASK;
(uintptr_t)prgetstackbase(p) +
}
}
/*
* Manufacture a filename for the "object" directory.
*/
else
}
/*
* Get the SysV shared memory id, if any.
*/
SHMID_NONE) {
} else {
}
}
return (0);
}
#endif /* _SYSCALL32_IMPL */
/*
* Return the size of the /proc page data file.
*/
{
return (0);
size = sizeof (prpageheader_t);
do {
}
return (size);
}
#ifdef _SYSCALL32_IMPL
{
return (0);
size = sizeof (prpageheader32_t);
do {
}
return (size);
}
#endif /* _SYSCALL32_IMPL */
/*
* Read page data information.
*/
int
{
int error;
return (0);
}
return (E2BIG);
}
do {
continue;
/*
* It's possible that the address space can change
* subtlely even though we're holding as->a_lock
* due to the nondeterminism of page_exists() in
* the presence of asychronously flushed pages or
* mapped files whose sizes are changing.
* page_exists() may be called indirectly from
* pr_getprot() by a SEGOP_INCORE() routine.
* If this happens we need to make sure we don't
* overrun the buffer whose size we computed based
* on the initial iteration through the segments.
* Once we've detected an overflow, we need to clean
* up the temporary memory allocated in pr_getprot()
* and retry. If there's a pending signal, we return
* EINTR so that this thread can be dislodged if
* a latent bug causes us to spin indefinitely.
*/
return (EINTR);
goto again;
}
if (prot & PROT_WRITE)
/*
* Manufacture a filename for the "object" directory.
*/
else
}
/*
* Get the SysV shared memory id, if any.
*/
SHMID_NONE) {
} else {
}
}
return (error);
}
#ifdef _SYSCALL32_IMPL
int
{
int error;
return (0);
}
return (E2BIG);
}
do {
continue;
/*
* It's possible that the address space can change
* subtlely even though we're holding as->a_lock
* due to the nondeterminism of page_exists() in
* the presence of asychronously flushed pages or
* mapped files whose sizes are changing.
* page_exists() may be called indirectly from
* pr_getprot() by a SEGOP_INCORE() routine.
* If this happens we need to make sure we don't
* overrun the buffer whose size we computed based
* on the initial iteration through the segments.
* Once we've detected an overflow, we need to clean
* up the temporary memory allocated in pr_getprot()
* and retry. If there's a pending signal, we return
* EINTR so that this thread can be dislodged if
* a latent bug causes us to spin indefinitely.
*/
return (EINTR);
goto again;
}
if (prot & PROT_WRITE)
/*
* Manufacture a filename for the "object" directory.
*/
else
}
/*
* Get the SysV shared memory id, if any.
*/
SHMID_NONE) {
} else {
}
}
return (error);
}
#endif /* _SYSCALL32_IMPL */
{
/*
* The value returned will be relevant in the zone of the examiner,
* which may not be the same as the zone which performed the procfs
* mount.
*/
/*
* Prorate over online cpus so we don't exceed 100%
*/
if (nonline > 1)
pct = 0x8000;
}
/*
* Return information used by ps(1).
*/
void
{
kthread_t *t;
else {
thread_unlock(t);
}
/*
* only export SSYS and SMSACCT; everything else is off-limits to
* userland apps.
*/
mutex_enter(&p->p_crlock);
mutex_exit(&p->p_crlock);
/*
* Inside local zones, fake zsched's pid as parent pids for
* processes which reference processes outside of the zone.
*/
} else {
}
switch (p->p_model) {
case DATAMODEL_ILP32:
break;
case DATAMODEL_LP64:
break;
}
if (t == NULL) {
if (wcode)
} else {
dev_t d;
d = cttydev(p);
/*
* If the controlling terminal is the real
* or workstation console device, map to what the
* user thinks is the console device. Handle case when
* rwsconsdev or rconsdev is set to NODEV for Starfire.
*/
d = uconsdev;
/* get the chosen lwp's lwpsinfo */
/* compute %cpu for the process */
if (p->p_lwpcnt == 1)
else {
t = p->p_tlist;
do {
}
} else {
mutex_exit(&p->p_lock);
mutex_enter(&p->p_lock);
}
}
}
#ifdef _SYSCALL32_IMPL
void
{
kthread_t *t;
else {
thread_unlock(t);
}
/*
* only export SSYS and SMSACCT; everything else is off-limits to
* userland apps.
*/
mutex_enter(&p->p_crlock);
mutex_exit(&p->p_crlock);
/*
* Inside local zones, fake zsched's pid as parent pids for
* processes which reference processes outside of the zone.
*/
} else {
}
switch (p->p_model) {
case DATAMODEL_ILP32:
break;
case DATAMODEL_LP64:
break;
}
if (t == NULL) {
extern int wstat(int, int); /* needs a header file */
if (wcode)
} else {
dev_t d;
d = cttydev(p);
/*
* If the controlling terminal is the real
* or workstation console device, map to what the
* user thinks is the console device. Handle case when
* rwsconsdev or rconsdev is set to NODEV for Starfire.
*/
d = uconsdev;
/* get the chosen lwp's lwpsinfo */
/* compute %cpu for the process */
if (p->p_lwpcnt == 1)
else {
t = p->p_tlist;
do {
}
} else {
mutex_exit(&p->p_lock);
mutex_enter(&p->p_lock);
}
}
/*
* If we are looking at an LP64 process, zero out
* the fields that cannot be represented in ILP32.
*/
if (p->p_model != DATAMODEL_ILP32) {
}
}
#endif /* _SYSCALL32_IMPL */
void
{
char c, state;
/* map the thread state enum into a process state enum */
switch (state) {
default: state = 0; c = '?'; break;
}
if (retval == 0) {
}
/* compute %cpu for the lwp */
}
#ifdef _SYSCALL32_IMPL
void
{
char c, state;
/* map the thread state enum into a process state enum */
switch (state) {
default: state = 0; c = '?'; break;
}
if (retval == 0) {
} else {
}
/* compute %cpu for the lwp */
}
#endif /* _SYSCALL32_IMPL */
/*
* This used to get called when microstate accounting was disabled but
* microstate information was requested. Since Microstate accounting is on
* regardless of the proc flags, this simply makes it appear to procfs that
* microstate accounting is on. This is relatively meaningless since you
* can't turn it off, but this is here for the sake of appearances.
*/
/*ARGSUSED*/
void
{
proc_t *p;
if (t == NULL)
return;
p = ttoproc(t);
/*
* A system process (p0) could be referenced if the thread is
* in the process of exiting. Don't turn on microstate accounting
* in that case.
*/
return;
/*
* Loop through all the LWPs (kernel threads) in the process.
*/
t = p->p_tlist;
do {
t->t_proc_flag |= TP_MSACCT;
}
/*
* It's not really possible to disable microstate accounting anymore.
* However, this routine simply turns off the ms accounting flags in a process
* This way procfs can still pretend to turn microstate accounting on and
* off for a process, but it actually doesn't do anything. This is
* a neutered form of preemptive idiot-proofing.
*/
void
disable_msacct(proc_t *p)
{
kthread_t *t;
/*
* Loop through all the LWPs (kernel threads) in the process.
*/
do {
/* clear per-thread flag */
t->t_proc_flag &= ~TP_MSACCT;
}
}
/*
* Return resource usage information.
*/
void
{
int state;
int i;
} else {
}
/*
* Adjust for time waiting in the dispatcher queue.
*/
if (waitrq != 0) {
scalehrtime(&tmp1);
}
/*
* Adjust for time spent in current microstate.
*/
}
i = 0;
do {
case LMS_SLEEP:
/*
* Update the timer for the current sleep state.
*/
case LMS_TFAULT:
case LMS_DFAULT:
case LMS_KFAULT:
case LMS_USER_LOCK:
break;
default:
break;
}
break;
case LMS_TFAULT:
case LMS_DFAULT:
case LMS_KFAULT:
case LMS_USER_LOCK:
state = LMS_SYSTEM;
break;
}
switch (state) {
default: panic("prgetusage: unknown microstate");
}
if (tmp1 < 0) {
i++;
continue;
}
scalehrtime(&tmp1);
} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
/* update pup timestamp */
/*
* Resource usage counters.
*/
}
/*
* Convert ms_acct stats from unscaled high-res time to nanoseconds
*/
void
{
}
/*
* Sum resource usage information.
*/
void
{
int state;
int i;
scalehrtime(&tmp);
} else {
scalehrtime(&tmp);
}
prscaleusage(&conv);
/*
* Adjust for time waiting in the dispatcher queue.
*/
if (waitrq != 0) {
scalehrtime(&tmp);
}
/*
* Adjust for time spent in current microstate.
*/
}
i = 0;
do {
case LMS_SLEEP:
/*
* Update the timer for the current sleep state.
*/
case LMS_TFAULT:
case LMS_DFAULT:
case LMS_KFAULT:
case LMS_USER_LOCK:
break;
default:
break;
}
break;
case LMS_TFAULT:
case LMS_DFAULT:
case LMS_KFAULT:
case LMS_USER_LOCK:
state = LMS_SYSTEM;
break;
}
switch (state) {
default: panic("praddusage: unknown microstate");
}
if (tmp < 0) {
i++;
continue;
}
scalehrtime(&tmp);
} while (tmp < 0 && i < MAX_ITERS_SPIN);
/* update pup timestamp */
/*
* Resource usage counters.
*/
}
/*
* Convert a prhusage_t to a prusage_t.
* This means convert each hrtime_t to a timestruc_t
* and copy the count fields uint64_t => ulong_t.
*/
void
{
int i;
for (i = 0; i < 22; i++)
}
#ifdef _SYSCALL32_IMPL
void
{
int i;
for (i = 0; i < 22; i++)
}
#endif /* _SYSCALL32_IMPL */
/*
* Determine whether a set is empty.
*/
int
{
while (n--)
if (*sp++)
return (0);
return (1);
}
/*
* Utility routine for establishing a watched area in the process.
* Keep the list of watched areas sorted by virtual address.
*/
int
{
struct watched_area *target;
int error = 0;
/* we must not be holding p->p_lock, but the process must be locked */
/*
* If this is our first watchpoint, enable watchpoints for the process.
*/
if (!pr_watch_active(p)) {
kthread_t *t;
mutex_enter(&p->p_lock);
do {
watch_enable(t);
}
mutex_exit(&p->p_lock);
}
/*
* We discovered an existing, overlapping watched area.
* Allow it only if it is an exact match.
*/
}
} else {
}
return (error);
}
/*
* Utility routine for clearing a watched area in the process.
* Must be an exact match of the virtual address.
* size and flags don't matter.
*/
int
{
struct watched_area *found;
/* we must not be holding p->p_lock, but the process must be locked */
if (!pr_watch_active(p)) {
return (0);
}
/*
* Look for a matching address in the watched areas. If a match is
* found, clear the old watched area and adjust the watched page(s). It
* is not an error if there is no match.
*/
}
/*
* If we removed the last watched area from the process, disable
* watchpoints.
*/
if (!pr_watch_active(p)) {
kthread_t *t;
mutex_enter(&p->p_lock);
do {
watch_disable(t);
}
mutex_exit(&p->p_lock);
}
return (0);
}
/*
* Frees all the watched_area structures
*/
void
{
struct watched_area *delp;
void *cookie;
avl_destroy(&p->p_warea);
}
/*
* This one is called by the traced process to unwatch all the
* pages while deallocating the list of watched_page structs.
*/
void
{
struct watched_page *pwp;
void *cookie;
return;
retrycnt = 0;
retrycnt++;
goto retry;
}
}
}
}
}
/*
* Insert a watched area into the list of watched pages.
* If oflags is zero then we are adding a new watched area.
* Otherwise we are changing the flags of an existing watched area.
*/
static int
{
struct watched_page tpw;
/*
* We need to pre-allocate a list of structures before we grab the
* address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
* held.
*/
}
/*
* Search for an existing watched page to contain the watched area.
* If none is found, grab a new one from the available list
* and insert it in the active list, keeping the list sorted
* by user-level virtual address.
*/
else
}
return (E2BIG);
}
}
}
prot &= ~PROT_WRITE;
}
}
}
/*
* If the watched area extends into the next page then do
* it over again with the virtual address of the next page.
*/
goto again;
/*
* Free any pages we may have over-allocated
*/
}
return (0);
}
/*
* Remove a watched area from the list of watched pages.
* A watched area may extend over more than one page.
*/
static void
{
struct watched_page *pwp;
struct watched_page tpw;
else
/*
* Reset the hat layer's protections on this page.
*/
prot &=
prot &= ~PROT_WRITE;
prot &=
}
}
} else {
/*
* No watched areas remain in this page.
* Reset everything to normal.
*/
}
}
}
}
}
/*
* Return the original protections for the specified page.
*/
static void
{
struct watched_page *pwp;
struct watched_page tpw;
}
static prpagev_t *
{
/*
* Limit the size of our vectors to pagev_lim pages at a time. We need
* 4 or 5 bytes of storage per page, so this means we limit ourself
* to about a megabyte of kernel heap by default.
*/
if (check_noreserve)
else
return (pagev);
}
static void
{
}
static caddr_t
{
return (eaddr);
else
/*
* INCORE cleverly has different semantics than GETPROT:
* it returns info on pages up to but NOT including addr + len.
*/
do {
/*
* Guilty knowledge here: We know that segvn_incore
* returns more than just the low-order bit that
* indicates the page is actually in memory. If any
* bits are set, then the page has backing store.
*/
goto out;
/*
* If we examined all the pages in the vector but we're not
* at the end of the segment, take another lap.
*/
goto refill;
}
/*
* Need to take len - 1 because addr + len is the address of the
* first byte of the page just past the end of what we want.
*/
out:
return (addr);
}
static caddr_t
{
/*
* Our starting address is either the specified address, or the base
* address from the start of the pagev. If the latter is greater,
* this means a previous call to pr_pagev_fill has already scanned
* further than the end of the previous mapping.
*/
/*
* If we're dealing with noreserve pages, then advance addr to
* the address of the next page which has backing store.
*/
prot = 0;
goto out;
}
prot = 0;
goto out;
}
}
}
}
/*
* Get the protections on the page corresponding to addr.
*/
/*
* Now loop until we find a backed page with different protections
* or we reach the end of this segment.
*/
/*
* If pn has advanced to the page number following what we
* have information on, refill the page vector and reset
* addr and pn. If pr_pagev_fill does not return the
* address of the next page, we have a discontiguity and
* thus have reached the end of the current mapping.
*/
goto out;
}
/*
* The previous page's protections are in prot, and it has
* backing. If this page is MAP_NORESERVE and has no backing,
* then end this mapping and return the previous protections.
*/
break;
/*
* Otherwise end the mapping if this page's protections (nprot)
* are different than those in the previous page (prot).
*/
break;
}
out:
return (addr);
}
{
/*
* If we're interested in the reserved space, return the size of the
* segment itself. Everything else in this function is a special case
* to determine the actual underlying size of various segment types.
*/
if (reserved)
return (size);
/*
* If this is a segvn mapping of a regular file, return the smaller
* of the segment size and the remaining size of the file beyond
* the file offset corresponding to seg->s_base.
*/
fsize = 0;
else
}
return (size);
}
/*
* If this is an ISM shared segment, don't include pages that are
* beyond the real size of the spt segment that backs it.
*/
/*
* reservation of virtual address space and has no actual size.
* Such segments are backed by segdev and have type set to neither
* MAP_SHARED nor MAP_PRIVATE.
*/
(MAP_SHARED | MAP_PRIVATE)) == 0))
return (0);
/*
* If this segment doesn't match one of the special types we handle,
* just return the size of the segment itself.
*/
return (size);
}
{
int check_noreserve;
union {
struct segvn_data *svd;
struct segdev_data *sdp;
void *data;
} s;
/*
* Don't include MAP_NORESERVE pages in the address range
* unless their mappings have actually materialized.
* We cheat by knowing that segvn is the only segment
* driver that supports MAP_NORESERVE.
*/
/*
* Examine every page only as a last resort. We use guilty knowledge
* of segvn and segdev to avoid this: if there are no per-page
* protections present in the segment and we don't care about
* MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
*/
} else {
/*
* If addr is sitting at the start of the segment, then
* create a page vector to store protection and incore
* information for pages in the segment, and fill it.
* Otherwise, we expect *tmp to address the prpagev_t
* allocated by a previous call to this function.
*/
prot = 0;
goto out;
}
} else {
}
}
out:
return (prot);
}
void
pr_getprot_done(void **tmp)
{
}
}
/*
* Return true iff the vnode is a /proc file from the object directory.
*/
int
{
}
/*
* Return true iff the vnode is a /proc file opened by the process itself.
*/
int
{
/*
* XXX: To retain binary compatibility with the old
* ioctl()-based version of /proc, we exempt self-opens
* of /proc/<pid> from being marked close-on-exec.
*/
}
static ssize_t
{
if (pagesize == -1)
break;
}
return (hatsize);
}
/*
* Return an array of structures with extended memory map information.
* We allocate here; the caller must deallocate.
*/
int
{
/*
* Request an initial buffer size that doesn't waste memory
* if the address space has only a small number of segments.
*/
return (0);
do {
char *parr;
/*
* Segment loop part one: iterate from the base of the segment
* to its end, pausing at each address boundary (baddr) between
* ranges that have different virtual memory protections.
*/
/*
* Segment loop part two: iterate from the current
* position to the end of the protection boundary,
* pausing at each address boundary (naddr) between
* ranges that have different underlying page sizes.
*/
if (prot & PROT_WRITE)
if (psz == -1) {
mp->pr_hatpagesize = 0;
} else {
}
/*
* Manufacture a filename for the "object" dir.
*/
NULL) == 0) {
"a.out");
else
}
/*
* Get the SysV shared memory id, if any.
*/
} else {
}
}
}
}
return (0);
}
/*
* Return the process's credentials. We don't need a 32-bit equivalent of
* this function because prcred_t and prcred32_t are actually the same.
*/
void
{
mutex_enter(&p->p_crlock);
mutex_exit(&p->p_crlock);
}
/*
* Compute actual size of the prpriv_t structure.
*/
prgetprivsize(void)
{
return (priv_prgetprivsize(NULL));
}
/*
* Return the process's privileges. We don't need a 32-bit equivalent of
* this function because prpriv_t and prpriv32_t are actually the same.
*/
void
{
mutex_enter(&p->p_crlock);
mutex_exit(&p->p_crlock);
}
#ifdef _SYSCALL32_IMPL
/*
* Return an array of structures with HAT memory map information.
* We allocate here; the caller must deallocate.
*/
int
{
prxmap32_t *mp;
/*
* Request an initial buffer size that doesn't waste memory
* if the address space has only a small number of segments.
*/
return (0);
do {
char *parr;
/*
* Segment loop part one: iterate from the base of the segment
* to its end, pausing at each address boundary (baddr) between
* ranges that have different virtual memory protections.
*/
/*
* Segment loop part two: iterate from the current
* position to the end of the protection boundary,
* pausing at each address boundary (naddr) between
* ranges that have different underlying page sizes.
*/
if (prot & PROT_WRITE)
if (psz == -1) {
mp->pr_hatpagesize = 0;
} else {
}
/*
* Manufacture a filename for the "object" dir.
*/
NULL) == 0) {
"a.out");
else
}
/*
* Get the SysV shared memory id, if any.
*/
} else {
}
}
}
}
return (0);
}
#endif /* _SYSCALL32_IMPL */