fbt.c revision a1b5e537933659371285214eae1db2603e6364b4
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <vm/seg_kmem.h>
#include <sys/sysmacros.h>
static dev_info_t *fbt_devi;
static dtrace_provider_id_t fbt_id;
static uintptr_t fbt_trampoline;
static caddr_t fbt_trampoline_window;
static size_t fbt_trampoline_size;
static int fbt_verbose = 0;
/*
* Various interesting bean counters.
*/
static int fbt_entry;
static int fbt_ret;
static int fbt_retl;
static int fbt_retl_jmptab;
static int fbt_retl_twoinstr;
static int fbt_retl_tailcall;
static int fbt_retl_tailjmpl;
static int fbt_leaf_functions;
extern char stubs_base[];
extern char stubs_end[];
#define FBT_REG_G0 0
#define FBT_REG_G1 1
#define FBT_REG_O0 8
#define FBT_REG_O1 9
#define FBT_REG_O2 10
#define FBT_REG_O3 11
#define FBT_REG_O4 12
#define FBT_REG_O5 13
#define FBT_REG_O6 14
#define FBT_REG_O7 15
#define FBT_REG_I0 24
#define FBT_REG_I1 25
#define FBT_REG_I2 26
#define FBT_REG_I3 27
#define FBT_REG_I4 28
#define FBT_REG_I7 31
#define FBT_REG_L0 16
#define FBT_REG_L1 17
#define FBT_REG_L2 18
#define FBT_REG_L3 19
#define FBT_REG_PC 5
#define FBT_REG_ISGLOBAL(r) ((r) < 8)
#define FBT_REG_ISVOLATILE(r) \
#define FBT_REG_NLOCALS 8
#define FBT_REG_MARKLOCAL(locals, r) \
if (FBT_REG_ISLOCAL(r)) \
(local) = FBT_REG_L0
(local)++; \
#define FBT_OP_MASK 0xc0000000
#define FBT_OP_SHIFT 30
#define FBT_SIMM13_MASK 0x1fff
#define FBT_IMM22_MASK 0x3fffff
#define FBT_IMM22_SHIFT 10
#define FBT_IMM10_MASK 0x3ff
#define FBT_DISP30_MASK 0x3fffffff
#define FBT_DISP22_MASK 0x3fffff
#define FBT_DISP19_MASK 0x7ffff
#define FBT_DISP16_HISHIFT 20
#define FBT_DISP16_LOMASK (0x3fff)
#define FBT_DISP16(val) \
#define FBT_DISP14_MASK 0x3fff
#define FBT_ILLTRAP 0
#define FBT_ANNUL_SHIFT 29
#define FBT_FMT3_OP3_SHIFT 19
#define FBT_FMT3_OP_MASK 0xc1f80000
#define FBT_FMT3_RD_SHIFT 25
#define FBT_FMT3_RD(val) \
#define FBT_FMT3_RS1_SHIFT 14
#define FBT_FMT3_RS1(val) \
#define FBT_FMT3_RS2_SHIFT 0
#define FBT_FMT3_RS2(val) \
#define FBT_FMT3_IMM_SHIFT 13
#define FBT_FMT3_SIMM13_MASK FBT_SIMM13_MASK
#define FBT_FMT2_OP2_SHIFT 22
#define FBT_FMT2_RD_SHIFT 25
#define FBT_FMT2_COND_SHIFT 25
#define FBT_OP_CALL FBT_OP1
#define FBT_RET \
/*
* We're only going to treat a save as safe if (a) both rs1 and rd are
* %sp and (b) if the instruction has a simm, the value isn't 0.
*/
#define FBT_IS_SAVE(instr) \
#define FBT_IS_PCRELATIVE(instr) \
#define FBT_IS_CTI(instr) \
#define FBT_PROBENAME_ENTRY "entry"
#define FBT_PROBENAME_RETURN "return"
#define FBT_ESTIMATE_ID (UINT32_MAX)
#define FBT_ENT_MAXSIZE \
typedef struct fbt_probe {
char *fbtp_name;
int fbtp_loadcnt;
int fbtp_symndx;
int fbtp_primary;
int fbtp_return;
} fbt_probe_t;
typedef struct fbt_trampoline {
static caddr_t
{
ASSERT(fbt_trampoline_size == 0);
}
S_WRITE);
}
static void
{
ASSERT(fbt_trampoline_size != 0);
membar_enter();
fbt_trampoline_size = 0;
}
static uintptr_t
int nargs)
{
/*
* There isn't sufficient room for this entry; return failure.
*/
return (0);
}
if (FBT_IS_SAVE(first)) {
} else {
}
} else {
}
if (nargs >= 1)
if (nargs >= 2)
if (nargs >= 3)
if (nargs >= 4)
if (nargs >= 5)
if (FBT_IS_SAVE(first)) {
tinstr++;
} else {
tinstr++;
/*
* This is a special case: we are instrumenting a
* a non-annulled branch-always (or variant). We'll
* return directly to the destination of the branch,
* copying the instruction in the delay slot here,
* and then executing it in the slot of a ba.
*/
} else {
}
}
tinstr++;
} else {
/*
* If this is a branch-on-register, we have a little
* more work to do: because the displacement is only
* sixteen bits, we're going to thunk the branch into
* the trampoline, and then ba,a to the appropriate
* destination in the branch targets. That is, we're
* constructing this sequence in the trampoline:
*
* br[cc] %[rs], 1f
* <delay-instruction>
* ba,a <not-taken-destination>
* 1: ba,a <taken-destination>
*
*/
tinstr++;
tinstr++;
tinstr++;
}
}
return (1);
}
/*
* We are patching control-transfer/restore couplets. There are three
* variants of couplet:
*
* (a) return rs1 + imm
* delay
*
* (b) jmpl rs1 + (rs2 | offset), rd
* restore rs1, rs2 | imm, rd
*
* (c) call displacement
* restore rs1, rs2 | imm, rd
*
* If rs1 in (a) is anything other than %i7, or imm is anything other than 8,
* or delay is a DCTI, we fail. If rd from the jmpl in (b) is something other
* than %g0 (a ret or a tail-call through a function pointer) or %o7 (a call
* through a register), we fail.
*
* Note that rs1 and rs2 in the restore instructions in (b) and (c) are
* relied upon across the call to dtrace_probe(), we move rs1 into an unused
* local, ls0, and rs2 into an unused local, ls1, and restructure the restore
* to be:
*
* restore ls0, ls1, rd
*
* If the jmpl uses outputs or globals, we restructure it to be:
*
* jmpl ls2 + (ls3 | offset), (%g0 | %o7)
*
*/
/*ARGSUSED*/
static int
{
int rd;
/*
* It's unclear if we should warn about this or not.
* We really wouldn't expect the compiler to generate
* return instructions with something other than %i7
* as rs1 and 8 as the simm13 -- it would just be
* mean-spirited. That said, such a construct isn't
* necessarily incorrect. Sill, we err on the side of
* caution and warn about it...
*/
"%p: non-canonical return instruction", name,
(void *)instr);
return (0);
}
if (FBT_IS_CTI(delay)) {
/*
* This is even weirder -- a DCTI coupled with a
* return instruction. Similar constructs are used to
* return from utraps, but these typically have the
* return in the slot -- and we wouldn't expect to see
* it in the kernel regardless. At any rate, we don't
* want to try to instrument this construct, whatever
* it may be.
*/
"%p: CTI in delay slot of return instruction",
return (0);
}
if (FBT_IS_PCRELATIVE(delay)) {
/*
* This is also very weird, but might be correct code
* if the function is (for example) returning the
* address of the delay instruction of the return as
* its return value (e.g. "rd %pc, %o0" in the slot).
* Perhaps correct, but still too weird to not warn
* about it...
*/
"%p: PC-relative instruction in delay slot of "
return (0);
}
return (1);
}
return (0);
return (1);
return (0);
return (1);
/*
* We have encountered a jmpl that is storing the calling %pc in
* some register besides %i7, %o7 or %g0. This is strange; emit
* a warning and fail.
*/
return (0);
}
static int
{
/*
* If this is a call (or a jmpl that links into %o7), we can
* patch it iff the next instruction uses %o7 as a destination
* register. Because there is an ABI responsibility to
* particularly care how this routine is managing to restore
* it (mov, add, ld or divx for all we care). If it doesn't
* seem to be restoring it at all, however, we'll refuse
* to patch it.
*/
/*
* This is odd. Before we assume that we're looking
* at something bizarre (and warn accordingly), we'll
* check to see if it's obviously a jump table entry.
*/
return (0);
return (0);
}
return (1);
}
/*
* If this is the second instruction in the function, we're
* going to allow it to be patched if the first instruction
* is a patchable return-from-leaf instruction.
*/
return (1);
}
return (0);
return (0);
return (1);
}
/*ARGSUSED*/
static uint32_t
{
/*
* There isn't sufficient room for this entry; return failure.
*/
return (FBT_ILLTRAP);
}
/*
* To handle the case of the return instruction, we'll emit a
* restore, followed by the instruction in the slot (which
* we'll transplant here), and then another save. While it
* may seem intellectually unsatisfying to emit the additional
* we don't do this if the instruction in the return delay
* slot is a nop -- which it is nearly 90% of the time with
* gcc. (And besides, this couplet can't induce unnecessary
* in terms of the current window hardly seems worth the
* trouble -- let alone the risk.)
*/
}
}
/*
* Mark the locals used in the jmpl.
*/
if (!FBT_FMT3_ISIMM(cti)) {
}
}
/*
* And mark the locals used in the restore.
*/
if (!FBT_FMT3_ISIMM(restore)) {
}
if (FBT_REG_ISVOLATILE(rs1)) {
}
if (!FBT_FMT3_ISIMM(cti)) {
if (FBT_REG_ISVOLATILE(rs2)) {
}
}
}
if (FBT_REG_ISVOLATILE(rs1)) {
}
if (!FBT_FMT3_ISIMM(restore)) {
if (FBT_REG_ISVOLATILE(rs2)) {
}
}
} else {
}
} else {
}
tinstr++;
/*
* If the destination register of the restore is %o0, we
* need to perform the implied calculation to derive the
* return value.
*/
add &= ~FBT_FMT3_RD_MASK;
} else {
}
/*
* If the control transfer instruction is %pc-relative (i.e. a
* call), we need to reset it appropriately.
*/
tinstr++;
} else {
}
}
static uint32_t
{
int annul = 0;
/*
* There isn't sufficient room for this entry; return failure.
*/
return (FBT_ILLTRAP);
}
annul = 1;
} else {
annul = 1;
}
}
/*
* If we have a jmpl and it's in terms of output registers, we
* need to rewrite it to be in terms of the corresponding input
* registers. If it's in terms of the globals, we'll rewrite
* it to be in terms of locals.
*/
if (FBT_REG_ISOUTPUT(rs1))
if (FBT_REG_ISGLOBAL(rs1)) {
rs1 = FBT_REG_L0;
}
if (!FBT_FMT3_ISIMM(cti)) {
if (FBT_REG_ISOUTPUT(rs2))
if (FBT_REG_ISGLOBAL(rs2)) {
rs2 = FBT_REG_L1;
}
}
/*
* Now we need to check the rd and source register for the jmpl;
* If neither rd nor the source register is %o7, then we might
* have a jmp that is actually part of a jump table. We need
* to generate the code to compare it to the base and limit of
* the function.
*/
if (FBT_FMT3_ISIMM(cti)) {
} else {
}
}
}
} else {
}
} else {
}
tinstr++;
/*
* If the control transfer instruction is %pc-relative (i.e. a
* call), we need to reset it appropriately.
*/
tinstr++;
annul = 1;
} else {
annul = 1;
}
} else {
}
}
}
/*ARGSUSED*/
static void
{
char *name;
int i;
int primary = 0;
int error;
int estimate = 1;
/*
* Employees of dtrace and their families are ineligible. Void
* where prohibited.
*/
return;
struct modctl_list *list;
return;
}
}
/*
* KMDB is ineligible for instrumentation -- it may execute in
* any context, including probe context.
*/
return;
/*
* If this module doesn't (yet) have its string or symbol
* table allocated, clear out.
*/
return;
}
if (mp->fbt_nentries) {
/*
* This module has some FBT entries allocated; we're afraid
* to screw with it.
*/
return;
}
estimate = 0;
/*
*/
sizeof (struct modctl)) == 0;
/*
* Open the CTF data for the module. We'll use this to determine the
* functions that can be instrumented. Note that this call can fail,
* in which case we'll use heuristics to determine the functions that
* can be instrumented. (But in particular, leaf functions will not be
* instrumented.)
*/
if (!estimate) {
}
for (i = 1; i < nsyms; i++) {
uint32_t, fbt_trampoline_t *, const char *);
continue;
/*
* Weak symbols are not candidates. This could be made to
* work (where weak functions and their underlying function
* appear as two disjoint probes), but it's not simple.
*/
continue;
/*
* Anything beginning with "dtrace_" may be called
* from probe context unless it explitly indicates
* that it won't be called from probe context by
* using the prefix "dtrace_safe_".
*/
continue;
}
/*
* Any function name beginning with "kdi_" or
* containing the string "_kdi_" is a part of the
* kernel debugger interface and may be called in
* arbitrary context -- including probe context.
*/
continue;
}
/*
* Anything with the string "__relocatable" anywhere
* in the function name is considered to be a function
* that may be manually relocated before execution.
* Because FBT uses a PC-relative technique for
* instrumentation, these functions cannot safely
* be instrumented by us.
*/
continue;
}
/*
* The ip_ocsum_* family of routines are all ABI
* violators. (They expect incoming arguments in the
* globals!) Break the ABI? No soup for you!
*/
continue;
}
/*
* We want to scan the function for one (and only one) save.
* Any more indicates that something fancy is going on.
*/
/*
* We don't want to interpose on the module stubs.
*/
continue;
/*
* We can't safely trace a zero-length function...
*/
continue;
/*
* Due to 4524008, _init and _fini may have a bloated st_size.
* While this bug was fixed quite some time ago, old drivers
* may be lurking. We need to develop a better solution to
* this problem, such that correct _init and _fini functions
* (the vast majority) may be correctly traced. One solution
* may be to scan through the entire symbol table to see if
* any symbol overlaps with _init. If none does, set a bit in
* the module structure that this module has correct _init and
* _fini sizes. This will cause some pain the first time a
* module is scanned, but at least it would be O(N) instead of
* O(N log N)...
*/
continue;
continue;
/*
* While we try hard to only trace safe functions (that is,
* functions at TL=0), one unsafe function manages to otherwise
* appear safe: prom_trap(). We could discover prom_trap()
* if we added an additional rule: in order to trace a
* function, we must either (a) discover a restore or (b)
* determine that the function does not have any unlinked
* control transfers to another function (i.e., the function
* never returns). Unfortunately, as of this writing, one
* legitimate function (resume_from_zombie()) transfers
* control to a different function (_resume_from_idle())
* without executing a restore. Barring a rule to figure out
* that resume_from_zombie() is safe while prom_trap() is not,
* we resort to hard-coding prom_trap() here.
*/
continue;
have_ctf = 1;
} else {
nargs = 32;
}
/*
* If the first instruction of the function is a branch and
* it's not a branch-always-not-annulled, we're going to refuse
* to patch it.
*/
if (have_ctf) {
" begins with non-ba, "
"non-br CTI", name);
}
continue;
}
}
while (!FBT_IS_SAVE(*instr)) {
/*
* Before we assume that this is a leaf routine, check
* forward in the basic block for a save.
*/
/*
* This is a CTI. If we see a subsequent
* save, we will refuse to process this
* routine unless both of the following are
* true:
*
* (a) The branch is not annulled
*
* (b) The subsequent save is in the delay
* slot of the branch
*/
cti = 1;
} else {
instr++;
break;
}
}
cti = 1;
break;
}
/*
* If we found a CTI before the save, we need to not
* do anything. But if we have CTF information, this
* is weird enough that it merits a message.
*/
if (!have_ctf)
continue;
"save not in first basic block", name);
continue;
}
if (!have_ctf)
continue;
is_leaf = 1;
if (!estimate)
} else {
}
/*
* Before we assume that this isn't something tricky,
* look for other saves. If we find them, there are
* multiple entry points here (or something), and we'll
* leave it alone.
*/
if (FBT_IS_SAVE(*instr))
break;
}
continue;
}
if (FBT_IS_CTI(*instr)) {
/*
* If we have a CTI, we want to be sure that we don't
* have a CTI or a PC-relative instruction in the
* delay slot -- we want to be able to thunk the
* instruction into the trampoline without worrying
* about either DCTIs or relocations. It would be
* very odd for the compiler to generate this kind of
* code, so we warn about it if we have CTF
* information.
*/
if (!have_ctf)
continue;
"CTI in delay slot of first instruction",
name);
continue;
}
if (!have_ctf)
continue;
"PC-relative instruction in delay slot of"
" first instruction", name);
continue;
}
}
if (estimate) {
} else {
"in module %s (sym %d of %d)", modname,
i, nsyms);
break;
}
fbt->fbtp_symndx = i;
mp->fbt_nentries++;
}
continue;
goto again;
if (estimate) {
goto again;
}
} else {
}
break;
}
fbt->fbtp_symndx = i;
mp->fbt_nentries++;
goto again;
}
if (estimate) {
/*
* Slosh on another entry's worth...
*/
"for module %s", modname);
} else {
estimate = 0;
goto forreal;
}
} else {
}
}
/*ARGSUSED*/
static void
{
do {
((struct module *)
}
}
}
/*ARGSUSED*/
static void
{
ctl->mod_nenabled++;
if (f->fbtp_patchpoint == NULL) {
/*
* Due to a shortened FBT table, this entry was never
* completed; refuse to enable it.
*/
if (fbt_verbose) {
"(short FBT table in %s)",
}
return;
}
}
/*
* If this module has disappeared since we discovered its probes,
* refuse to enable it.
*/
if (fbt_verbose) {
"(module %s unloaded)",
}
return;
}
/*
* Now check that our modctl has the expected load count. If it
* doesn't, this module must have been unloaded and reloaded -- and
* we're not going to touch it.
*/
if (fbt_verbose) {
"(module %s reloaded)",
}
return;
}
}
/*ARGSUSED*/
static void
{
ctl->mod_nenabled--;
if (f->fbtp_patchpoint == NULL)
return;
}
return;
}
/*ARGSUSED*/
static void
{
return;
return;
}
/*ARGSUSED*/
static void
{
return;
return;
}
/*ARGSUSED*/
static void
{
int error;
const char *parent;
goto err;
return;
}
/*
* We have no CTF information for this module -- and therefore
* no args[] information.
*/
goto err;
}
/*
* If we have a parent container, we must manually import it.
*/
/*
* We must iterate over all modules to find the module that
* is our parent.
*/
break;
}
goto err;
goto err;
goto err;
}
}
goto err;
if (fbt->fbtp_return) {
goto err;
type = f.ctc_return;
} else {
goto err;
goto err;
}
DTRACE_ARGTYPELEN) != NULL) {
return;
}
err:
}
static dtrace_pattr_t fbt_attr = {
};
static dtrace_pops_t fbt_pops = {
NULL,
NULL,
NULL,
};
static int
{
switch (cmd) {
case DDI_ATTACH:
break;
case DDI_RESUME:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
switch (cmd) {
case DDI_DETACH:
break;
case DDI_SUSPEND:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
if (dtrace_unregister(fbt_id) != 0)
return (DDI_FAILURE);
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static int
{
int error;
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
error = DDI_SUCCESS;
break;
case DDI_INFO_DEVT2INSTANCE:
*result = (void *)0;
error = DDI_SUCCESS;
break;
default:
error = DDI_FAILURE;
}
return (error);
}
/*ARGSUSED*/
static int
{
return (0);
}
static struct cb_ops fbt_cb_ops = {
fbt_open, /* open */
nodev, /* close */
nulldev, /* strategy */
nulldev, /* print */
nodev, /* dump */
nodev, /* read */
nodev, /* write */
nodev, /* ioctl */
nodev, /* devmap */
nodev, /* mmap */
nodev, /* segmap */
nochpoll, /* poll */
ddi_prop_op, /* cb_prop_op */
0, /* streamtab */
};
DEVO_REV, /* devo_rev */
0, /* refcnt */
fbt_info, /* get_dev_info */
nulldev, /* identify */
nulldev, /* probe */
fbt_attach, /* attach */
fbt_detach, /* detach */
nodev, /* reset */
&fbt_cb_ops, /* driver operations */
NULL, /* bus operations */
nodev /* dev power */
};
/*
* Module linkage information for the kernel.
*/
&mod_driverops, /* module type (this is a pseudo driver) */
"Function Boundary Tracing", /* name of module */
&fbt_ops, /* driver ops */
};
static struct modlinkage modlinkage = {
(void *)&modldrv,
};
int
_init(void)
{
return (mod_install(&modlinkage));
}
int
{
}
int
_fini(void)
{
return (mod_remove(&modlinkage));
}