p123_pcbe.c revision 5d3a5ad8d2a9319e80861563ceff0e6d8d530a32
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Performance Counter Back-End for Pentiums I, II, and III.
*/
#include <sys/cpc_impl.h>
#include <sys/cpc_pcbe.h>
#include <sys/inttypes.h>
#include <sys/x86_archext.h>
#include <sys/archsystm.h>
#include <sys/privregs.h>
static int ptm_pcbe_init(void);
static uint_t ptm_pcbe_ncounters(void);
static const char *ptm_pcbe_impl_name(void);
static const char *ptm_pcbe_cpuref(void);
static char *ptm_pcbe_list_attrs(void);
static int ptm_pcbe_pic_index(char *picname);
static uint64_t ptm_pcbe_overflow_bitmap(void);
void *token);
static void ptm_pcbe_program(void *token);
static void ptm_pcbe_allstop(void);
static void ptm_pcbe_sample(void *token);
static void ptm_pcbe_free(void *config);
0,
};
typedef enum _ptm_ver {
} ptm_ver_t;
static const char *ptm_impl_name;
static const char *ptm_cpuref;
/*
* Indicates whether the "rdpmc" instruction is available on this processor.
*/
static int ptm_rdpmc_avail = 0;
#define ALL_STOPPED 0ULL
typedef struct _ptm_pcbe_config {
struct nametable {
const char *name;
};
#define NT_END 0xFF
/*
* Basic Pentium events
*/
#define P5_EVENTS \
{0x0, "data_read"}, \
{0x1, "data_write"}, \
{0x2, "data_tlb_miss"}, \
{0x3, "data_read_miss"}, \
{0x4, "data_write_miss"}, \
{0x5, "write_hit_to_M_or_E"}, \
{0x6, "dcache_lines_wrback"}, \
{0x7, "external_snoops"}, \
{0x8, "external_dcache_snoop_hits"}, \
{0x9, "memory_access_in_both_pipes"}, \
{0xa, "bank_conflicts"}, \
{0xb, "misaligned_ref"}, \
{0xc, "code_read"}, \
{0xd, "code_tlb_miss"}, \
{0xe, "code_cache_miss"}, \
{0xf, "any_segreg_loaded"}, \
{0x12, "branches"}, \
{0x13, "btb_hits"}, \
{0x14, "taken_or_btb_hit"}, \
{0x15, "pipeline_flushes"}, \
{0x16, "instr_exec"}, \
{0x17, "instr_exec_V_pipe"}, \
{0x18, "clks_bus_cycle"}, \
{0x19, "clks_full_wbufs"}, \
{0x1a, "pipe_stall_read"}, \
{0x1b, "stall_on_write_ME"}, \
{0x1c, "locked_bus_cycle"}, \
{0x1d, "io_rw_cycles"}, \
{0x1e, "reads_noncache_mem"}, \
{0x1f, "pipeline_agi_stalls"}, \
{0x22, "flops"}, \
{0x23, "bp_match_dr0"}, \
{0x24, "bp_match_dr1"}, \
{0x25, "bp_match_dr2"}, \
{0x26, "bp_match_dr3"}, \
{0x27, "hw_intrs"}, \
{0x28, "data_rw"}, \
{0x29, "data_rw_miss"}
static const struct nametable P5mmx_names0[] = {
{0x2a, "bus_ownership_latency"},
{0x2b, "mmx_instr_upipe"},
{0x2c, "cache_M_line_sharing"},
{0x2d, "emms_instr"},
{0x2e, "bus_util_processor"},
{0x2f, "sat_mmx_instr"},
{0x30, "clks_not_HLT"},
{0x31, "mmx_data_read"},
{0x32, "clks_fp_stall"},
{0x33, "d1_starv_fifo_0"},
{0x34, "mmx_data_write"},
{0x35, "pipe_flush_wbp"},
{0x36, "mmx_misalign_data_refs"},
{0x37, "rets_pred_incorrect"},
{0x38, "mmx_multiply_unit_interlock"},
{0x39, "rets"},
{0x3a, "btb_false_entries"},
{0x3b, "clocks_stall_full_wb"},
{NT_END, ""}
};
static const struct nametable P5mmx_names1[] = {
{0x2a, "bus_ownership_transfers"},
{0x2b, "mmx_instr_vpipe"},
{0x2c, "cache_lint_sharing"},
{0x2d, "mmx_fp_transitions"},
{0x2e, "writes_noncache_mem"},
{0x2f, "sats_performed"},
{0x30, "clks_dcache_tlb_miss"},
{0x31, "mmx_data_read_miss"},
{0x32, "taken_br"},
{0x33, "d1_starv_fifo_1"},
{0x34, "mmx_data_write_miss"},
{0x35, "pipe_flush_wbp_wb"},
{0x36, "mmx_pipe_stall_data_read"},
{0x37, "rets_pred"},
{0x38, "movd_movq_stall"},
{0x39, "rsb_overflow"},
{0x3a, "btb_mispred_nt"},
{0x3b, "mmx_stall_write_ME"},
{NT_END, ""}
};
};
/*
* Pentium Pro and Pentium II events
*/
/*
* Data cache unit
*/
{0x43, "data_mem_refs"},
{0x45, "dcu_lines_in"},
{0x46, "dcu_m_lines_in"},
{0x47, "dcu_m_lines_out"},
{0x48, "dcu_miss_outstanding"},
/*
* Instruction fetch unit
*/
{0x80, "ifu_ifetch"},
{0x81, "ifu_ifetch_miss"},
{0x85, "itlb_miss"},
{0x86, "ifu_mem_stall"},
{0x87, "ild_stall"},
/*
* L2 cache
*/
{0x28, "l2_ifetch"},
{0x29, "l2_ld"},
{0x2a, "l2_st"},
{0x24, "l2_lines_in"},
{0x26, "l2_lines_out"},
{0x25, "l2_m_lines_inm"},
{0x27, "l2_m_lines_outm"},
{0x2e, "l2_rqsts"},
{0x21, "l2_ads"},
{0x22, "l2_dbus_busy"},
{0x23, "l2_dbus_busy_rd"},
/*
* External bus logic
*/
{0x62, "bus_drdy_clocks"},
{0x63, "bus_lock_clocks"},
{0x60, "bus_req_outstanding"},
{0x65, "bus_tran_brd"},
{0x66, "bus_tran_rfo"},
{0x67, "bus_trans_wb"},
{0x68, "bus_tran_ifetch"},
{0x69, "bus_tran_inval"},
{0x6a, "bus_tran_pwr"},
{0x6b, "bus_trans_p"},
{0x6c, "bus_trans_io"},
{0x6d, "bus_tran_def"},
{0x6e, "bus_tran_burst"},
{0x70, "bus_tran_any"},
{0x6f, "bus_tran_mem"},
{0x64, "bus_data_rcv"},
{0x61, "bus_bnr_drv"},
{0x7a, "bus_hit_drv"},
{0x7b, "bus_hitm_drv"},
{0x7e, "bus_snoop_stall"},
/*
* Floating point unit
*/
{0xc1, "flops"}, /* 0 only */
{0x10, "fp_comp_ops_exe"}, /* 0 only */
{0x11, "fp_assist"}, /* 1 only */
{0x12, "mul"}, /* 1 only */
{0x13, "div"}, /* 1 only */
{0x14, "cycles_div_busy"}, /* 0 only */
/*
* Memory ordering
*/
{0x3, "ld_blocks"},
{0x4, "sb_drains"},
{0x5, "misalign_mem_ref"},
/*
* Instruction decoding and retirement
*/
{0xc0, "inst_retired"},
{0xc2, "uops_retired"},
{0xd0, "inst_decoder"},
/*
* Interrupts
*/
{0xc8, "hw_int_rx"},
{0xc6, "cycles_int_masked"},
{0xc7, "cycles_int_pending_and_masked"},
/*
* Branches
*/
{0xc4, "br_inst_retired"},
{0xc5, "br_miss_pred_retired"},
{0xc9, "br_taken_retired"},
{0xca, "br_miss_pred_taken_ret"},
{0xe0, "br_inst_decoded"},
{0xe2, "btb_misses"},
{0xe4, "br_bogus"},
{0xe6, "baclears"},
/*
* Stalls
*/
{0xa2, "resource_stalls"},
{0xd2, "partial_rat_stalls"},
/*
* Segment register loads
*/
{0x6, "segment_reg_loads"},
/*
* Clocks
*/
{0x79, "cpu_clk_unhalted"},
/*
* MMX
*/
{0xb0, "mmx_instr_exec"},
{0xb1, "mmx_sat_instr_exec"},
{0xb2, "mmx_uops_exec"},
{0xb3, "mmx_instr_type_exec"},
{0xcc, "fp_mmx_trans"},
{0xcd, "mmx_assists"},
{0xce, "mmx_instr_ret"},
{0xd4, "seg_rename_stalls"},
{0xd5, "seg_reg_renames"},
{0xd6, "ret_seg_renames"},
{NT_END, ""}
};
};
#define BITS(v, u, l) \
(((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
/*
* "Well known" bit fields in the Pentium CES register
* The interfaces in libcpc should make these #defines uninteresting.
*/
#define CPC_P5_CESR_ES0_SHIFT 0
#define CPC_P5_CESR_ES0_MASK 0x3f
#define CPC_P5_CESR_ES1_SHIFT 16
#define CPC_P5_CESR_ES1_MASK 0x3f
#define CPC_P5_CESR_OS0 6
#define CPC_P5_CESR_USR0 7
#define CPC_P5_CESR_CLK0 8
#define CPC_P5_CESR_PC0 9
/*
* "Well known" bit fields in the Pentium Pro PerfEvtSel registers
* The interfaces in libcpc should make these #defines uninteresting.
*/
#define CPC_P6_PES_INV 23
#define CPC_P6_PES_EN 22
#define CPC_P6_PES_INT 20
#define CPC_P6_PES_PC 19
#define CPC_P6_PES_E 18
#define CPC_P6_PES_OS 17
#define CPC_P6_PES_USR 16
#define CPC_P6_PES_UMASK_SHIFT 8
#define CPC_P6_PES_UMASK_MASK (0xffu)
#define CPC_P6_PES_CMASK_SHIFT 24
#define CPC_P6_PES_CMASK_MASK (0xffu)
#define CPC_P6_PES_PIC0_MASK (0xffu)
#define CPC_P6_PES_PIC1_MASK (0xffu)
/*
* Pentium 5 attributes
*/
/*
* Pentium 6 attributes
*/
#define P6_NOEDGE 0x1
#define P6_PC 0x2
/*
* CPU reference strings
*/
#define P5_CPUREF "See Appendix A.4 of the \"IA-32 Intel Architecture " \
"Software Developer's Manual Volume 3: System " \
"Programming Guide,\" Order # 245472-012, 2003"
#define P6_CPUREF "See Appendix A.3 of the \"IA-32 Intel Architecture " \
"Software Developer's Manual Volume 3: System " \
"Programming Guide,\" Order # 245472-012, 2003"
static int
ptm_pcbe_init(void)
{
const struct nametable *n;
int i;
if (x86_feature & X86_MMX)
ptm_rdpmc_avail = 1;
/*
* Discover type of CPU and set events pointer appropriately.
*
* Map family and model into the performance
* counter architectures we currently understand.
*
* See application note AP485 (from developer.intel.com)
* for further explanation.
*/
return (-1);
switch (cpuid_getfamily(CPU)) {
case 5: /* Pentium and Pentium with MMX */
ptm_impl_name = "Pentium";
else
ptm_impl_name = "Pentium with MMX";
break;
case 6: /* Pentium Pro and Pentium II and III */
if (x86_feature & X86_MMX)
ptm_impl_name = "Pentium Pro with MMX, Pentium II";
else
ptm_impl_name = "Pentium Pro, Pentium II";
break;
default:
return (-1);
}
/*
* Initialize the list of events for each PIC.
* Do two passes: one to compute the size necessary and another
* to copy the strings. Need room for event, comma, and NULL terminator.
*/
for (i = 0; i < 2; i++) {
size = 0;
*pic_events[i] = '\0';
}
/*
* Remove trailing comma.
*/
}
return (0);
}
static uint_t
ptm_pcbe_ncounters(void)
{
return (2);
}
static const char *
ptm_pcbe_impl_name(void)
{
return (ptm_impl_name);
}
static const char *
ptm_pcbe_cpuref(void)
{
return (ptm_cpuref);
}
static char *
{
if (pic_events[0] == NULL) {
}
return (pic_events[picnum]);
}
static char *
ptm_pcbe_list_attrs(void)
{
if (ptm_ver == PTM_VER_P5)
return ("noedge,pc");
else
return ("noedge,pc,inv,int,umask,cmask");
}
static const struct nametable *
{
const struct nametable *n;
return (n);
return (NULL);
}
static uint64_t
ptm_pcbe_event_coverage(char *event)
{
bitmap = 0x1;
bitmap |= 0x2;
return (bitmap);
}
static uint64_t
ptm_pcbe_overflow_bitmap(void)
{
/*
* P5 is not capable of generating interrupts.
*/
/*
* CPC could have caused an interrupt provided that
*
* 1) Counters are enabled
* 2) Either counter has requested an interrupt
*/
return (0);
/*
* If a particular counter requested an interrupt, assume it caused
* this interrupt. There is no way to determine which counter overflowed
* on this hardware other than by using unreliable heuristics.
*/
ret |= 0x1;
ret |= 0x2;
return (ret);
}
/*ARGSUSED*/
static int
void *token)
{
const struct nametable *n;
int i;
int ptm_flags = 0;
/*
* If we've been handed an existing configuration, we need only preset
* the counter value.
*/
return (0);
}
return (CPC_INVALID_PICNUM);
long tmp;
/*
* If ddi_strtol() likes this event, use it as a raw event code.
*/
return (CPC_INVALID_EVENT);
if (ptm_ver == PTM_VER_P5)
else
n = &nt_raw;
}
if (ptm_ver == PTM_VER_P5) {
int picshift;
for (i = 0; i < nattrs; i++) {
/*
* Value of these attributes is ignored; their presence
* alone tells us to set the corresponding flag.
*/
} else {
return (CPC_INVALID_ATTRIBUTE);
}
}
if (flags & CPC_COUNT_USER)
if (flags & CPC_COUNT_SYSTEM)
} else {
for (i = 0; i < nattrs; i++) {
sizeof (ptm_pcbe_config_t));
return (CPC_ATTRIBUTE_OUT_OF_RANGE);
}
sizeof (ptm_pcbe_config_t));
return (CPC_ATTRIBUTE_OUT_OF_RANGE);
}
} else {
return (CPC_INVALID_ATTRIBUTE);
}
}
if (flags & CPC_OVF_NOTIFY_EMT)
/*
* If the user has requested notification of overflows,
* we automatically program the hardware to generate
* overflow interrupts.
*/
if (flags & CPC_COUNT_USER)
if (flags & CPC_COUNT_SYSTEM)
}
return (0);
}
static void
ptm_pcbe_program(void *token)
{
}
if (ptm_rdpmc_avail) {
if (kcpc_allow_nonpriv(token))
else
}
if (ptm_ver == PTM_VER_P5) {
} else {
}
}
static void
ptm_pcbe_allstop(void)
{
if (ptm_ver == PTM_VER_P5)
else {
}
}
static void
ptm_pcbe_sample(void *token)
{
}
}
if (ptm_ver == PTM_VER_P5) {
} else {
}
}
static void
ptm_pcbe_free(void *config)
{
}
/*
* Virtualizes the 40-bit field of the %pic
* register into a 64-bit software register.
*
* We can retrieve 40 (signed) bits from the counters,
* but we can set only 32 (signed) bits into the counters.
* This makes virtualizing more than 31-bits of registers
* quite tricky.
*
* If bits 39 to 31 are set in the virtualized pic register,
* then we can preset the counter to this value using the fact
* that wrmsr sign extends bit 31. Though it might look easier
* to only use the bottom 31-bits of the register, we have to allow
* the full 40-bits to be used to perform overflow profiling.
*/
static int64_t
{
if (diff < 0)
} else {
if (diff < 0)
}
return (diff);
}
static uint64_t
{
}
"Pentium Performance Counters v%I%",
};
static struct modlinkage modl = {
&modlpcbe,
};
int
_init(void)
{
if (ptm_pcbe_init() != 0)
return (ENOTSUP);
return (mod_install(&modl));
}
int
_fini(void)
{
return (mod_remove(&modl));
}
int
{
}