kcpc.c revision bb4f5042011d7f9f0d3818f8dfce534a16bd9f24
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/inttypes.h>
#include <sys/cpc_impl.h>
#include <sys/cpc_pcbe.h>
#if defined(__x86)
#endif
int kcpc_cpuctx; /* number of cpu-specific contexts */
/*
* These are set when a PCBE module is loaded.
*/
uint_t cpc_ncounters = 0;
/*
* Statistics on (mis)behavior
*/
/*
* Is misbehaviour (overflow in a thread with no context) fatal?
*/
#ifdef DEBUG
static int kcpc_nullctx_panic = 1;
#else
static int kcpc_nullctx_panic = 0;
#endif
static kcpc_ctx_t *kcpc_ctx_alloc(void);
void
{
}
int
{
int error;
ctx = kcpc_ctx_alloc();
return (EINVAL);
}
return (error);
}
/*
* We must hold cpu_lock to prevent DR, offlining, or unbinding while
* we are manipulating the cpu_t and programming the hardware, else the
* the cpu_t could go away while we're looking at it.
*/
/*
* The CPU could have been DRd out while we were getting set up.
*/
goto unbound;
/*
* If this CPU already has a bound set, return an error.
*/
goto unbound;
}
goto unbound;
}
/*
* Kernel preemption must be disabled while fiddling with the hardware
* registers to prevent partial updates.
*/
return (0);
return (EAGAIN);
}
int
{
int error;
/*
* Only one set is allowed per context, so ensure there is no
* existing context.
*/
return (EEXIST);
ctx = kcpc_ctx_alloc();
/*
* The context must begin life frozen until it has been properly
* programmed onto the hardware. This prevents the context ops from
* worrying about it until we're ready.
*/
return (EINVAL);
}
/*
* Permit threads to look at their own hardware counters from userland.
*/
/*
* Create the data store for this set.
*/
return (error);
}
/*
* Add a device context to the subject thread.
*/
/*
* Ask the backend to program the hardware.
*/
if (t == curthread) {
} else
/*
* Since we are the agent LWP, we know the victim LWP is stopped
* until we're done here; no need to worry about preemption or
* migration here. We still use an atomic op to clear the flag
* to ensure the flags are always self-consistent; they can
* still be accessed from, for instance, another CPU doing a
* kcpc_invalidate_all().
*/
return (0);
}
/*
* Walk through each request in the set and ask the PCBE to configure a
* corresponding counter.
*/
static int
{
int i;
int ret;
int n;
ASSERT(n >= 0 && n < cpc_ncounters);
== 0) {
*subcode = -1;
return (ENOTSUP);
}
/*
* If any of the counters have requested overflow
* notification, we flag the context as being one that
* cares about overflow.
*/
}
switch (ret) {
case CPC_HV_NO_ACCESS:
return (EACCES);
default:
return (EINVAL);
}
}
}
return (0);
}
static void
{
int i;
}
/*
* buf points to a user address and the data should be copied out to that
* address in the current process.
*/
int
{
return (EINVAL);
return (EAGAIN);
/*
* Kernel preemption must be disabled while reading the
* hardware regs, and if this is a CPU-bound context, while
* checking the CPU binding of the current thread.
*/
return (EAGAIN);
}
}
}
/*
* The config may have been invalidated by
* the pcbe_sample op.
*/
return (EAGAIN);
}
return (EFAULT);
return (EFAULT);
return (EFAULT);
return (0);
}
/*
* Stop the counters on the CPU this context is bound to.
*/
static void
{
== KCPC_CTX_INVALID);
pcbe_ops->pcbe_allstop();
} else
}
int
{
kthread_t *t;
return (EINVAL);
/*
* The context is thread-bound and therefore has a device
* context. It will be freed via removectx() calling
* freectx() calling kcpc_free().
*/
if (t == curthread &&
pcbe_ops->pcbe_allstop();
}
#ifdef DEBUG
panic("kcpc_unbind: context %p not preset on thread %p",
ctx, t);
#else
#endif /* DEBUG */
} else {
/*
* If we are unbinding a CPU-bound set from a remote CPU, the
* native CPU's idle thread could be in the midst of programming
* this context onto the CPU. We grab the context's lock here to
* ensure that the idle thread is done with it. When we release
* the lock, the CPU no longer has a context and the idle thread
* will move on.
*
* cpu_lock must be held to prevent the CPU from being DR'd out
* while we disassociate the context from the cpu_t.
*/
/*
* The CPU may have been DR'd out of the system.
*/
}
}
}
return (0);
}
int
{
int i;
return (EINVAL);
break;
return (0);
}
int
{
int i;
/*
* If the user is doing this on a running set, make sure the counters
* are stopped first.
*/
pcbe_ops->pcbe_allstop();
}
/*
* Ask the backend to program the hardware.
*/
return (0);
}
/*
* Caller must hold kcpc_cpuctx_lock.
*/
int
{
int i;
int flag;
int err;
/*
* This thread has a set but no context; it must be a
* CPU-bound set.
*/
return (EINVAL);
return (EAGAIN);
if (cmd == CPC_ENABLE) {
return (EINVAL);
} else if (cmd == CPC_DISABLE) {
return (EINVAL);
/*
* with current counter values, unbind, update requests with
* new config, then re-bind.
*/
pcbe_ops->pcbe_allstop();
if (enable)
else
}
if (kcpc_unbind(set) != 0)
return (EINVAL);
return (EINVAL);
}
} else
return (EINVAL);
return (0);
}
/*
* Provide PCBEs with a way of obtaining the configs of every counter which will
* be programmed together.
*
* If current is NULL, provide the first config.
*
* If data != NULL, caller wants to know where the data store associated with
* the config we return is located.
*/
void *
{
int i;
/*
* Client would like the first config, which may not be in
* counter 0; we need to search through the counters for the
* first config.
*/
for (i = 0; i < cpc_ncounters; i++)
break;
/*
* There are no counters configured for the given context.
*/
if (i == cpc_ncounters)
return (NULL);
} else {
/*
* There surely is a faster way to do this.
*/
for (i = 0; i < cpc_ncounters; i++) {
break;
}
/*
* We found the current config at picnum i. Now search for the
* next configured PIC.
*/
for (i++; i < cpc_ncounters; i++) {
break;
}
if (i == cpc_ncounters)
return (NULL);
}
}
}
static kcpc_ctx_t *
kcpc_ctx_alloc(void)
{
long hash;
ctx->kc_rawtick = 0;
return (ctx);
}
/*
* Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT
* in the flags.
*/
static void
{
int i, j;
int code;
return;
sizeof (kcpc_request_t), KM_SLEEP);
KM_SLEEP);
sizeof (kcpc_attr_t), KM_SLEEP);
}
}
}
}
static void
{
kcpc_ctx_t **loc;
}
/*
* Generic interrupt handler used on hardware that generates
* overflow interrupts.
*
* Note: executed at high-level interrupt context!
*/
/*ARGSUSED*/
{
int i;
/*
* On both x86 and UltraSPARC, we may deliver the high-level
* interrupt in kernel mode, just after we've started to run an
* interrupt thread. (That's because the hardware helpfully
* delivers the overflow interrupt some random number of cycles
* after the instruction that caused the overflow by which time
* we're in some part of the kernel, not necessarily running on
* the right thread).
*
* Check for this case here -- find the pinned thread
* that was running when the interrupt went off.
*/
if (t->t_flag & T_INTR_THREAD) {
/*
* Note that t_lwp is always set to point at the underlying
* thread, thus this will work in the presence of nested
* interrupts.
*/
}
} else
/*
* This can easily happen if we're using the counters in
* "shared" mode, for example, and an overflow interrupt
* occurs while we are running cpustat. In that case, the
* bound thread that has the context that belongs to this
* CPU is almost certainly sleeping (if it was running on
* the CPU we'd have found it above), and the actual
* interrupted thread has no knowledge of performance counters!
*/
/*
* Return the bound context for this CPU to
* the interrupt handler so that it can synchronously
* sample the hardware counters and restart them.
*/
return (ctx);
}
/*
* As long as the overflow interrupt really is delivered early
* enough after trapping into the kernel to avoid switching
* threads, we must always be able to find the cpc context,
* or something went terribly wrong i.e. we ended up
* running a passivated interrupt thread, a kernel
* thread or we interrupted idle, all of which are Very Bad.
*/
if (kcpc_nullctx_panic)
panic("null cpc context, thread %p", (void *)t);
/*
* Schedule an ast to sample the counters, which will
* propagate any overflow into the virtualized performance
* counter(s), and may deliver a signal.
*/
/*
* If a counter has overflowed which was counting on behalf of
* a request which specified CPC_OVF_NOTIFY_EMT, send the
* process a signal.
*/
for (i = 0; i < cpc_ncounters; i++) {
bitmap & (1 << i) &&
/*
* A signal has been requested for this PIC, so
* so freeze the context. The interrupt handler
* has already stopped the counter hardware.
*/
}
}
aston(t);
}
return (NULL);
}
/*
* The current thread context had an overflow interrupt; we're
* executing here in high-level interrupt context.
*/
/*ARGSUSED*/
{
return (DDI_INTR_UNCLAIMED);
/*
* Prevent any further interrupts.
*/
pcbe_ops->pcbe_allstop();
/*
* Invoke the "generic" handler.
*
* If the interrupt has occurred in the context of an lwp owning
* the counters, then the handler posts an AST to the lwp to
* trigger the actual sampling, and optionally deliver a signal or
* restart the counters, on the way out of the kernel using
* kcpc_hw_overflow_ast() (see below).
*
* On the other hand, if the handler returns the context to us
* directly, then it means that there are no other threads in
* the middle of updating it, no AST has been posted, and so we
* should sample the counters here, and restart them with no
* further fuss.
*/
}
return (DDI_INTR_CLAIMED);
}
/*
* Called from trap() when processing the ast posted by the high-level
* interrupt handler.
*/
int
{
int i;
int found = 0;
/*
* An overflow happened: sample the context to ensure that
* the overflow is propagated into the upper bits of the
* virtualized 64-bit counter(s).
*/
/*
* The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED
* if that pic generated an overflow and if the request it was counting
* on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all
* pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we
* found any overflowed pics, keep the context frozen and return true
* (thus causing a signal to be sent).
*/
for (i = 0; i < cpc_ncounters; i++) {
found = 1;
}
}
if (found)
return (1);
/*
* Otherwise, re-enable the counters and continue life as before.
*/
return (0);
}
/*
* Called when switching away from current thread.
*/
static void
{
return;
/*
* This context has been invalidated but the counters have not
* been stopped. Stop them here and mark the context stopped.
*/
pcbe_ops->pcbe_allstop();
return;
}
pcbe_ops->pcbe_allstop();
return;
/*
* Need to sample for all reqs into each req's current mpic.
*/
}
static void
{
/*
* The context is invalidated but has not been marked stopped.
* We mark it as such here because we will not start the
* counters during this context switch.
*/
return;
/*
* While programming the hardware, the counters should be stopped. We
* don't do an explicit pcbe_allstop() here because they should have
* been stopped already by the last consumer.
*/
}
/*
* If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the
* following context operators to the idle thread on each CPU. They stop the
* counters when the idle thread is switched on, and they start them again when
* it is switched off.
*/
/*ARGSUSED*/
void
{
/*
* The idle thread shouldn't be run anywhere else.
*/
/*
* We must hold the CPU's context lock to ensure the context isn't freed
* while we're looking at it.
*/
return;
}
}
void
{
/*
* The idle thread shouldn't be run anywhere else.
*/
/*
* We must hold the CPU's context lock to ensure the context isn't freed
* while we're looking at it.
*/
return;
}
pcbe_ops->pcbe_allstop();
}
/*ARGSUSED*/
static void
{
int i;
return;
return;
}
cctx = kcpc_ctx_alloc();
/*
* Copy the parent context's kc_flags field, but don't overwrite
* the child's in case it was modified during kcpc_ctx_clone.
*/
/*
* Our contract with the user requires us to immediately send an
* overflow signal to all children if we have the LWPINHERIT
* and SIGOVF flags set. In addition, all counters should be
* set to UINT64_MAX, and their pic's overflow flag turned on
* so that our trap() processing knows to send a signal.
*/
}
}
}
}
/*
* Counter Stoppage Theory
*
* The counters may need to be stopped properly at the following occasions:
*
* 1) An LWP exits.
* 2) A thread exits.
* 3) An LWP performs an exec().
* 4) A bound set is unbound.
*
* In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need
* to be freed as well.
*
* Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on
* when the thread is freed, kcpc_free(), called by freectx(), frees the
* context.
*
* Case 2: same as case 1 except kcpc_passivate is called from thread_exit().
*
* Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has
* been called from exec. It stops the counters _and_ frees the context.
*
* Case 4: kcpc_unbind() stops the hardware _and_ frees the context.
*
* CPU-bound counters are always stopped via kcpc_unbind().
*/
/*
* We're being called to delete the context; we ensure that all associated data
* structures are freed, and that the hardware is passivated if this is an exec.
*/
/*ARGSUSED*/
static void
{
int i;
if (isexec) {
/*
* This thread is execing, and after the exec it should not have
* any performance counter context. Stop the counters properly
* here so the system isn't surprised by an overflow interrupt
* later.
*/
/*
* CPU-bound context; stop the appropriate CPU's ctrs.
* Hold cpu_lock while examining the CPU to ensure it
* doesn't go away.
*/
/*
* The CPU could have been DR'd out, so only stop the
* CPU and clear its context pointer if the CPU still
* exists.
*/
}
} else {
/*
* Thread-bound context; stop _this_ CPU's counters.
*/
pcbe_ops->pcbe_allstop();
}
/*
* Since we are being called from an exec and we know that
* exec is not permitted via the agent thread, we should clean
* up this thread's CPC state completely, and not leave dangling
* CPC pointers behind.
*/
}
/*
* Walk through each request in this context's set and free the PCBE's
* configuration if it exists.
*/
}
}
/*
* Free the memory associated with a request set.
*/
void
{
int i;
}
}
}
/*
* Grab every existing context and mark it as invalid.
*/
void
kcpc_invalidate_all(void)
{
long hash;
}
}
/*
* Interface for PCBEs to signal that an existing configuration has suddenly
* become invalid.
*/
void
kcpc_invalidate_config(void *token)
{
}
/*
* Called from lwp_exit() and thread_exit()
*/
void
kcpc_passivate(void)
{
return;
/*
* We're cleaning up after this thread; ensure there are no dangling
* CPC pointers left behind. The context and set will be freed by
* freectx() in the case of an LWP-bound set, and by kcpc_unbind() in
* the case of a CPU-bound set.
*/
/*
* This thread has a set but no context; it must be a CPU-bound
* set. The hardware will be stopped via kcpc_unbind() when the
* process exits and closes its file descriptors with
* kcpc_close(). Our only job here is to clean up this thread's
* state; the set will be freed with the unbind().
*/
(void) kcpc_unbind(set);
/*
* Unbinding a set belonging to the current thread should clear
* its set pointer.
*/
return;
}
/*
* happen for a bit as the exit proceeds. Kernel preemption must be
* disabled here to prevent a race between checking or setting the
* INVALID_STOPPED flag here and kcpc_restore() setting the flag during
* a context switch.
*/
pcbe_ops->pcbe_allstop();
}
}
/*
* Assign the requests in the given set to the PICs in the context.
* Returns 0 if successful, -1 on failure.
*/
/*ARGSUSED*/
static int
{
int i;
int *picnum_save;
/*
* Provide kcpc_tryassign() with scratch space to avoid doing an
*/
/*
* kcpc_tryassign() blindly walks through each request in the set,
* seeing if a counter can count its event. If yes, it assigns that
* counter. However, that counter may have been the only capable counter
* for _another_ request's event. The solution is to try every possible
* request first. Note that this does not cover all solutions, as
* that would require all unique orderings of requests, an n^n operation
* which would be unacceptable for architectures with many counters.
*/
break;
return (-1);
return (0);
}
static int
{
int i;
int j;
/*
* We are attempting to assign the reqs to pics, but we may fail. If we
* fail, we need to restore the state of the requests to what it was
* when we found it, as some reqs may have been explicitly assigned to
* a specific PIC beforehand. We do this by snapshotting the assignments
* now and restoring from it later if we fail.
*
* Also we note here which counters have already been claimed by
* requests with explicit counter assignments.
*/
}
/*
* Walk through requests assigning them to the first PIC that is
* capable.
*/
i = starting_req;
do {
i = 0;
continue;
}
for (j = 0; j < cpc_ncounters; j++) {
(resmap & (1 << j)) == 0) {
/*
* We can assign this counter because:
*
* 1. It can count the event (ctrmap)
* 2. It hasn't been assigned yet (bitmap)
* 3. It wasn't reserved by a request (resmap)
*/
bitmap |= (1 << j);
break;
}
}
if (j == cpc_ncounters) {
return (-1);
}
i = 0;
} while (i != starting_req);
return (0);
}
{
int i;
int j;
KM_SLEEP);
sizeof (kcpc_attr_t), KM_SLEEP);
}
}
return (new);
}
int
kcpc_allow_nonpriv(void *token)
{
}
void
{
}
/*
* Given a PCBE ID, attempt to load a matching PCBE module. The strings given
* are used to construct PCBE names, starting with the most specific,
* "pcbe.first.second.third.fourth" and ending with the least specific,
* "pcbe.first".
*
* Returns 0 if a PCBE was successfully loaded and -1 upon error.
*/
int
{
uint_t s[3];
s[0] = first;
s[1] = second;
s[2] = third;
return (modload_qualified("pcbe",
}