/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* CPU Performance Counter system calls and device driver.
*
* This module uses a combination of thread context operators, and
* thread-specific data to export CPU performance counters
* via both a system call and a driver interface.
*
* There are three access methods exported - the 'shared' device
* and the 'private' and 'agent' variants of the system call.
*
* The shared device treats the performance counter registers as
* a processor metric, regardless of the work scheduled on them.
* The private system call treats the performance counter registers
* as a property of a single lwp. This is achieved by using the
* thread context operators to virtualize the contents of the
* performance counter registers between lwps.
*
* The agent method is like the private method, except that it must
* be accessed via /proc's agent lwp to allow the counter context of
* other threads to be examined safely.
*
* The shared usage fundamentally conflicts with the agent and private usage;
* almost all of the complexity of the module is needed to allow these two
* models to co-exist in a reasonable way.
*/
#include <sys/processor.h>
#include <sys/machsystm.h>
#include <sys/cpc_impl.h>
#include <sys/cpc_pcbe.h>
/*
* Generic attributes supported regardless of processor.
*/
/*
* System call to access CPU performance counters.
*/
static int
{
kthread_t *t;
int error;
int size;
const char *str;
int code;
/*
* This CPC syscall should only be loaded if it found a PCBE to use.
*/
/*
* Only if /proc is invoking this system call from
* the agent thread do we allow the caller to examine
* the contexts of other lwps in the process. And
* because we know we're the agent, we know we don't
* have to grab p_lock because no-one else can change
* the state of the process.
*/
} else
t = curthread;
switch (cmd) {
case CPC_BIND:
/*
* udata1 = pointer to packed nvlist buffer
* udata2 = size of packed nvlist buffer
* udata3 = User addr to return error subcode in.
*/
if (kcpc_cpuctx || dtrace_cpc_in_use) {
}
if (kcpc_hw_lwp_hook() != 0) {
}
/*
* An LWP may only have one set bound to it at a time; if there
* is a set bound to this LWP already, we unbind it here.
*/
(void) kcpc_unbind(t->t_cpc_set);
}
kcpc_free_set(t->t_cpc_set);
}
kcpc_free_set(t->t_cpc_set);
/*
* EINVAL and EACCES are the only errors with more
* specific subcodes.
*/
}
return (0);
case CPC_SAMPLE:
/*
* udata1 = pointer to user's buffer
* udata2 = pointer to user's hrtime
* udata3 = pointer to user's tick
*/
/*
* We only allow thread-bound sets to be sampled via the
* syscall, so if this set has a CPU-bound context, return an
* error.
*/
udata3)) != 0)
return (0);
case CPC_PRESET:
case CPC_RESTART:
/*
* These are valid only if this lwp has a bound set.
*/
if (cmd == CPC_PRESET) {
/*
* The preset is shipped up to us from userland in two
* parts. This lets us handle 64-bit values from 32-bit
* and 64-bit applications in the same manner.
*
* udata1 = index of request to preset
* udata2 = new 64-bit preset (most sig. 32 bits)
* udata3 = new 64-bit preset (least sig. 32 bits)
*/
} else {
/*
* udata[1-3] = unused
*/
}
return (0);
case CPC_ENABLE:
case CPC_DISABLE:
udata1 = 0;
/*FALLTHROUGH*/
case CPC_USR_EVENTS:
case CPC_SYS_EVENTS:
/*
* Provided for backwards compatibility with CPCv1.
*
* Stop the counters and record the current counts. Use the
* counts as the preset to rebind a new set with the requests
* reconfigured as requested.
*
* udata1: 1 == enable; 0 == disable
* udata{2,3}: unused
*/
if ((error = kcpc_enable(t,
}
return (0);
case CPC_NPIC:
return (cpc_ncounters);
case CPC_CAPS:
case CPC_EVLIST_SIZE:
case CPC_LIST_EVENTS:
/*
* udata1 = pointer to user's int or buffer
* udata2 = picnum
* udata3 = unused
*/
if (cmd == CPC_EVLIST_SIZE) {
} else {
if (copyout(
}
return (0);
case CPC_ATTRLIST_SIZE:
case CPC_LIST_ATTRS:
/*
* udata1 = pointer to user's int or buffer
* udata2 = unused
* udata3 = unused
*
* attrlist size is length of PCBE-supported attributes, plus
* room for "picnum\0" plus an optional ',' separator char.
*/
if (str[0] != '\0')
/*
* A ',' separator character is necessary.
*/
size += 1;
if (cmd == CPC_ATTRLIST_SIZE) {
} else {
/*
* Copyout the PCBE attributes, and then append the
* generic attribute list (with separator if necessary).
*/
if (str[0] != '\0') {
== -1)
} else
}
return (0);
case CPC_IMPL_NAME:
case CPC_CPUREF:
/*
* udata1 = pointer to user's buffer
* udata2 = unused
* udata3 = unused
*/
if (cmd == CPC_IMPL_NAME) {
} else {
}
return (0);
case CPC_INVALIDATE:
kcpc_invalidate(t);
return (0);
case CPC_RELE:
return (0);
default:
}
}
/*
* The 'shared' device allows direct access to the
* performance counter control register of the current CPU.
* The major difference between the contexts created here and those
* above is that the context handlers are -not- installed, thus
* no context switching behaviour occurs.
*
* Because they manipulate per-cpu state, these ioctls can
* only be invoked from a bound lwp, by a caller with the cpc_cpu privilege
* who can open the relevant entry in /devices (the act of holding it open
* causes other uses of the counters to be suspended).
*
* Note that for correct results, the caller -must- ensure that
* all existing per-lwp contexts are either inactive or marked invalid;
* that's what the open routine does.
*/
/*ARGSUSED*/
static int
{
int error;
int code;
return (EAGAIN); /* someone unbound it? */
return (EFAULT);
}
switch (cmd) {
case CPCIO_BIND:
/*
* udata1 = pointer to packed nvlist buffer
* udata2 = size of packed nvlist buffer
* udata3 = User addr to return error subcode in.
*/
(void) kcpc_unbind(t->t_cpc_set);
}
return (error);
}
kcpc_free_set(t->t_cpc_set);
return (EFAULT);
return (EINVAL);
}
kcpc_free_set(t->t_cpc_set);
/*
* Subcodes are only returned for EINVAL and EACCESS.
*/
return (EFAULT);
return (error);
}
return (0);
case CPCIO_SAMPLE:
/*
* udata1 = pointer to user's buffer
* udata2 = pointer to user's hrtime
* udata3 = pointer to user's tick
*/
/*
* Only CPU-bound sets may be sampled via the ioctl(). If this
* set has no CPU-bound context, return an error.
*/
return (EINVAL);
udata3)) != 0)
return (error);
return (0);
case CPCIO_RELE:
return (EINVAL);
return (kcpc_unbind(t->t_cpc_set));
default:
return (EINVAL);
}
}
/*
* The device supports multiple opens, but only one open
* is allowed per processor. This is to enable multiple
* instances of tools looking at different processors.
*/
/*ARGSUSED1*/
static int
{
int error;
return (error);
return (ENXIO);
return (EINVAL);
return (EINVAL);
if (++kcpc_cpuctx == 1) {
/*
* Bail out if DTrace is already using the counters.
*/
if (dtrace_cpc_in_use) {
kcpc_cpuctx--;
return (EAGAIN);
}
KM_SLEEP);
/*
* When this device is open for processor-based contexts,
* no further lwp-based contexts can be created.
*
* Since this is the first open, ensure that all existing
* contexts are invalidated.
*/
kcpc_cpuctx--;
return (EAGAIN);
kcpc_cpuctx--;
return (EACCES);
}
return (0);
}
/*ARGSUSED1*/
static int
{
if (--kcpc_cpuctx == 0) {
kcpc_cpumap = NULL;
}
ASSERT(kcpc_cpuctx >= 0);
return (0);
}
/*
* Sane boundaries on the size of packed lists. In bytes.
*/
/*
* Sane boundary on the number of requests a set can contain.
*/
/*
* Sane boundary on the number of attributes a request can contain.
*/
/*
* Copy in a packed nvlist from the user and create a request set out of it.
* If successful, return 0 and store a pointer to the set we've created. Returns
* error code on error.
*/
int
{
int i;
int j;
char *packbuf;
char *string;
char *name;
return (EINVAL);
return (EFAULT);
}
return (EINVAL);
}
/*
* The nvlist has been unpacked so there is no need for the packed
* representation from this point on.
*/
i = 0;
switch (nvpair_type(nvp)) {
case DATA_TYPE_UINT32:
return (EINVAL);
}
break;
case DATA_TYPE_NVLIST_ARRAY:
&nreqs) != 0) {
return (EINVAL);
}
break;
default:
return (EINVAL);
}
i++;
}
/*
* There should be two members in the top-level nvlist:
* an array of nvlists consisting of the requests, and flags.
* Anything else is an invalid set.
*/
if (i != 2) {
return (EINVAL);
}
if (nreqs > CPC_MAX_NREQS) {
return (EINVAL);
}
/*
* The requests are now stored in the nvlist array at reqlist.
* Note that the use of kmem_zalloc() to alloc the kcpc_set_t means
* we don't need to call the init routines for ks_lock and ks_condv.
*/
/*
* If the nvlist didn't contain a flags member, setflags was initialized
* with an illegal value and this set will fail sanity checks later on.
*/
/*
*/
/*
* Build the set up one request at a time, always keeping it self-
* consistent so we can give it to kcpc_free_set() if we need to back
* out and return and error.
*/
for (i = 0; i < nreqs; i++) {
switch (nvpair_type(nvp)) {
case DATA_TYPE_UINT32:
goto inval;
break;
case DATA_TYPE_UINT64:
goto inval;
break;
case DATA_TYPE_STRING:
goto inval;
break;
case DATA_TYPE_NVLIST:
goto inval;
goto inval;
/*
* If the picnum has been specified as an
* attribute, consume that attribute here and
* remove it from the list of attributes.
*/
&uint64) == 0) {
DATA_TYPE_UINT64) != 0)
panic("nvlist %p faulty",
(void *)attrs);
}
kcpc_nvlist_npairs(attrs)) == 0)
break;
goto inval;
sizeof (kcpc_attr_t), KM_SLEEP);
j = 0;
if (nvpair_type(nvp_attr) !=
goto inval;
goto inval;
j++;
}
default:
break;
}
}
}
return (0);
return (EINVAL);
}
/*
* Count the number of nvpairs in the supplied nvlist.
*/
static uint32_t
{
uint32_t n = 0;
n++;
return (n);
}
/*
* Performs sanity checks on the given set.
* Returns 0 if the set checks out OK.
* Returns a detailed error subcode, or -1 if there is no applicable subcode.
*/
static int
{
int i;
int n;
return (-1);
return (-1);
/*
* The following comparison must cast cpc_ncounters to an int,
* because kr_picnum will be -1 if the request didn't explicitly
* choose a PIC.
*/
return (CPC_INVALID_PICNUM);
/*
* Of the pics whose physical picnum has been specified, make
* sure each PIC appears only once in set.
*/
if ((bitmap & (1 << n)) != 0)
return (-1);
bitmap |= (1 << n);
}
/*
* Make sure the requested index falls within the range of all
* requests.
*/
return (-1);
/*
* Make sure there are no unknown flags.
*/
return (CPC_REQ_INVALID_FLAGS);
}
return (0);
}
nodev, /* strategy */
nodev, /* print */
nodev, /* dump */
nodev, /* read */
nodev, /* write */
nodev, /* devmap */
nodev, /* mmap */
nodev, /* segmap */
nochpoll, /* poll */
NULL,
};
/*ARGSUSED*/
static int
{
return (DDI_PROBE_SUCCESS);
}
static int
{
if (cmd != DDI_ATTACH)
return (DDI_FAILURE);
KCPC_MINOR_SHARED, DDI_PSEUDO, 0));
}
/*ARGSUSED*/
static int
{
switch (cmd) {
case DDI_INFO_DEVT2DEVINFO:
case KCPC_MINOR_SHARED:
return (DDI_SUCCESS);
default:
break;
}
break;
case DDI_INFO_DEVT2INSTANCE:
*result = 0;
return (DDI_SUCCESS);
default:
break;
}
return (DDI_FAILURE);
}
0,
nulldev, /* identify */
nodev, /* detach */
nodev, /* reset */
&cb_ops,
(struct bus_ops *)0,
NULL,
ddi_quiesce_not_needed, /* quiesce */
};
"cpc sampling driver",
};
5,
};
"cpc sampling system call",
};
#ifdef _SYSCALL32_IMPL
"32-bit cpc sampling system call",
};
#endif
&modldrv,
&modlsys,
#ifdef _SYSCALL32_IMPL
#endif
};
int
_init(void)
{
if (kcpc_init() != 0)
return (ENOTSUP);
return (mod_install(&modl));
}
int
_fini(void)
{
return (mod_remove(&modl));
}
int
{
}