2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License, Version 1.0 only
2N/A * (the "License"). You may not use this file except in compliance
2N/A * with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A/*
2N/A * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
2N/A * Use is subject to license terms.
2N/A */
2N/A
2N/A#pragma ident "%Z%%M% %I% %E% SMI"
2N/A
2N/A#include <sys/types.h>
2N/A#include <string.h>
2N/A#include <alloca.h>
2N/A#include <stdlib.h>
2N/A#include <stdio.h>
2N/A#include <libintl.h>
2N/A
2N/A#include "libcpc.h"
2N/A#include "libcpc_impl.h"
2N/A
2N/A/*
2N/A * Configuration data for Pentium Pro performance counters.
2N/A *
2N/A * Definitions taken from [3]. See the reference to
2N/A * understand what any of these settings actually means.
2N/A *
2N/A * [3] "Pentium Pro Family Developer's Manual, Volume 3:
2N/A * Operating Systems Writer's Manual," January 1996
2N/A */
2N/A
2N/A#define V_P5 (1u << 0) /* specific to Pentium cpus */
2N/A#define V_P5mmx (1u << 1) /* " MMX instructions */
2N/A#define V_P6 (1u << 2) /* specific to Pentium II cpus */
2N/A#define V_P6mmx (1u << 3) /* " MMX instructions */
2N/A#define V_END 0
2N/A
2N/A/*
2N/A * map from "cpu version" to flag bits
2N/A */
2N/Astatic const uint_t cpuvermap[] = {
2N/A V_P5, /* CPC_PENTIUM */
2N/A V_P5 | V_P5mmx, /* CPC_PENTIUM_MMX */
2N/A V_P6, /* CPC_PENTIUM_PRO */
2N/A V_P6 | V_P6mmx, /* CPC_PENTIUM_PRO_MMX */
2N/A};
2N/A
2N/Astruct nametable {
2N/A const uint_t ver;
2N/A const uint8_t bits;
2N/A const char *name;
2N/A};
2N/A
2N/A/*
2N/A * Basic Pentium events
2N/A */
2N/A#define P5_EVENTS(v) \
2N/A {v, 0x0, "data_read"}, \
2N/A {v, 0x1, "data_write"}, \
2N/A {v, 0x2, "data_tlb_miss"}, \
2N/A {v, 0x3, "data_read_miss"}, \
2N/A {v, 0x4, "data_write_miss"}, \
2N/A {v, 0x5, "write_hit_to_M_or_E"}, \
2N/A {v, 0x6, "dcache_lines_wrback"}, \
2N/A {v, 0x7, "external_snoops"}, \
2N/A {v, 0x8, "external_dcache_snoop_hits"}, \
2N/A {v, 0x9, "memory_access_in_both_pipes"}, \
2N/A {v, 0xa, "bank_conflicts"}, \
2N/A {v, 0xb, "misaligned_ref"}, \
2N/A {v, 0xc, "code_read"}, \
2N/A {v, 0xd, "code_tlb_miss"}, \
2N/A {v, 0xe, "code_cache_miss"}, \
2N/A {v, 0xf, "any_segreg_loaded"}, \
2N/A {v, 0x12, "branches"}, \
2N/A {v, 0x13, "btb_hits"}, \
2N/A {v, 0x14, "taken_or_btb_hit"}, \
2N/A {v, 0x15, "pipeline_flushes"}, \
2N/A {v, 0x16, "instr_exec"}, \
2N/A {v, 0x17, "instr_exec_V_pipe"}, \
2N/A {v, 0x18, "clks_bus_cycle"}, \
2N/A {v, 0x19, "clks_full_wbufs"}, \
2N/A {v, 0x1a, "pipe_stall_read"}, \
2N/A {v, 0x1b, "stall_on_write_ME"}, \
2N/A {v, 0x1c, "locked_bus_cycle"}, \
2N/A {v, 0x1d, "io_rw_cycles"}, \
2N/A {v, 0x1e, "reads_noncache_mem"}, \
2N/A {v, 0x1f, "pipeline_agi_stalls"}, \
2N/A {v, 0x22, "flops"}, \
2N/A {v, 0x23, "bp_match_dr0"}, \
2N/A {v, 0x24, "bp_match_dr1"}, \
2N/A {v, 0x25, "bp_match_dr2"}, \
2N/A {v, 0x26, "bp_match_dr3"}, \
2N/A {v, 0x27, "hw_intrs"}, \
2N/A {v, 0x28, "data_rw"}, \
2N/A {v, 0x29, "data_rw_miss"}
2N/A
2N/Astatic const struct nametable P5mmx_names0[] = {
2N/A P5_EVENTS(V_P5),
2N/A {V_P5mmx, 0x2a, "bus_ownership_latency"},
2N/A {V_P5mmx, 0x2b, "mmx_instr_upipe"},
2N/A {V_P5mmx, 0x2c, "cache_M_line_sharing"},
2N/A {V_P5mmx, 0x2d, "emms_instr"},
2N/A {V_P5mmx, 0x2e, "bus_util_processor"},
2N/A {V_P5mmx, 0x2f, "sat_mmx_instr"},
2N/A {V_P5mmx, 0x30, "clks_not_HLT"},
2N/A {V_P5mmx, 0x31, "mmx_data_read"},
2N/A {V_P5mmx, 0x32, "clks_fp_stall"},
2N/A {V_P5mmx, 0x33, "d1_starv_fifo_0"},
2N/A {V_P5mmx, 0x34, "mmx_data_write"},
2N/A {V_P5mmx, 0x35, "pipe_flush_wbp"},
2N/A {V_P5mmx, 0x36, "mmx_misalign_data_refs"},
2N/A {V_P5mmx, 0x37, "rets_pred_incorrect"},
2N/A {V_P5mmx, 0x38, "mmx_multiply_unit_interlock"},
2N/A {V_P5mmx, 0x39, "rets"},
2N/A {V_P5mmx, 0x3a, "btb_false_entries"},
2N/A {V_P5mmx, 0x3b, "clocks_stall_full_wb"},
2N/A {V_END}
2N/A};
2N/A
2N/Astatic const struct nametable P5mmx_names1[] = {
2N/A P5_EVENTS(V_P5),
2N/A {V_P5mmx, 0x2a, "bus_ownership_transfers"},
2N/A {V_P5mmx, 0x2b, "mmx_instr_vpipe"},
2N/A {V_P5mmx, 0x2c, "cache_lint_sharing"},
2N/A {V_P5mmx, 0x2d, "mmx_fp_transitions"},
2N/A {V_P5mmx, 0x2e, "writes_noncache_mem"},
2N/A {V_P5mmx, 0x2f, "sats_performed"},
2N/A {V_P5mmx, 0x30, "clks_dcache_tlb_miss"},
2N/A {V_P5mmx, 0x31, "mmx_data_read_miss"},
2N/A {V_P5mmx, 0x32, "taken_br"},
2N/A {V_P5mmx, 0x33, "d1_starv_fifo_1"},
2N/A {V_P5mmx, 0x34, "mmx_data_write_miss"},
2N/A {V_P5mmx, 0x35, "pipe_flush_wbp_wb"},
2N/A {V_P5mmx, 0x36, "mmx_pipe_stall_data_read"},
2N/A {V_P5mmx, 0x37, "rets_pred"},
2N/A {V_P5mmx, 0x38, "movd_movq_stall"},
2N/A {V_P5mmx, 0x39, "rsb_overflow"},
2N/A {V_P5mmx, 0x3a, "btb_mispred_nt"},
2N/A {V_P5mmx, 0x3b, "mmx_stall_write_ME"},
2N/A {V_END}
2N/A};
2N/A
2N/Astatic const struct nametable *P5mmx_names[2] = {
2N/A P5mmx_names0,
2N/A P5mmx_names1
2N/A};
2N/A
2N/A/*
2N/A * Pentium Pro and Pentium II events
2N/A */
2N/Astatic const struct nametable P6_names[] = {
2N/A /*
2N/A * Data cache unit
2N/A */
2N/A {V_P6, 0x43, "data_mem_refs"},
2N/A {V_P6, 0x45, "dcu_lines_in"},
2N/A {V_P6, 0x46, "dcu_m_lines_in"},
2N/A {V_P6, 0x47, "dcu_m_lines_out"},
2N/A {V_P6, 0x48, "dcu_miss_outstanding"},
2N/A
2N/A /*
2N/A * Instruction fetch unit
2N/A */
2N/A {V_P6, 0x80, "ifu_ifetch"},
2N/A {V_P6, 0x81, "ifu_ifetch_miss"},
2N/A {V_P6, 0x85, "itlb_miss"},
2N/A {V_P6, 0x86, "ifu_mem_stall"},
2N/A {V_P6, 0x87, "ild_stall"},
2N/A
2N/A /*
2N/A * L2 cache
2N/A */
2N/A {V_P6, 0x28, "l2_ifetch"},
2N/A {V_P6, 0x29, "l2_ld"},
2N/A {V_P6, 0x2a, "l2_st"},
2N/A {V_P6, 0x24, "l2_lines_in"},
2N/A {V_P6, 0x26, "l2_lines_out"},
2N/A {V_P6, 0x25, "l2_m_lines_inm"},
2N/A {V_P6, 0x27, "l2_m_lines_outm"},
2N/A {V_P6, 0x2e, "l2_rqsts"},
2N/A {V_P6, 0x21, "l2_ads"},
2N/A {V_P6, 0x22, "l2_dbus_busy"},
2N/A {V_P6, 0x23, "l2_dbus_busy_rd"},
2N/A
2N/A /*
2N/A * External bus logic
2N/A */
2N/A {V_P6, 0x62, "bus_drdy_clocks"},
2N/A {V_P6, 0x63, "bus_lock_clocks"},
2N/A {V_P6, 0x60, "bus_req_outstanding"},
2N/A {V_P6, 0x65, "bus_tran_brd"},
2N/A {V_P6, 0x66, "bus_tran_rfo"},
2N/A {V_P6, 0x67, "bus_trans_wb"},
2N/A {V_P6, 0x68, "bus_tran_ifetch"},
2N/A {V_P6, 0x69, "bus_tran_inval"},
2N/A {V_P6, 0x6a, "bus_tran_pwr"},
2N/A {V_P6, 0x6b, "bus_trans_p"},
2N/A {V_P6, 0x6c, "bus_trans_io"},
2N/A {V_P6, 0x6d, "bus_tran_def"},
2N/A {V_P6, 0x6e, "bus_tran_burst"},
2N/A {V_P6, 0x70, "bus_tran_any"},
2N/A {V_P6, 0x6f, "bus_tran_mem"},
2N/A {V_P6, 0x64, "bus_data_rcv"},
2N/A {V_P6, 0x61, "bus_bnr_drv"},
2N/A {V_P6, 0x7a, "bus_hit_drv"},
2N/A {V_P6, 0x7b, "bus_hitm_drv"},
2N/A {V_P6, 0x7e, "bus_snoop_stall"},
2N/A
2N/A /*
2N/A * Floating point unit
2N/A */
2N/A {V_P6, 0xc1, "flops"}, /* 0 only */
2N/A {V_P6, 0x10, "fp_comp_ops_exe"}, /* 0 only */
2N/A {V_P6, 0x11, "fp_assist"}, /* 1 only */
2N/A {V_P6, 0x12, "mul"}, /* 1 only */
2N/A {V_P6, 0x13, "div"}, /* 1 only */
2N/A {V_P6, 0x14, "cycles_div_busy"}, /* 0 only */
2N/A
2N/A /*
2N/A * Memory ordering
2N/A */
2N/A {V_P6, 0x3, "ld_blocks"},
2N/A {V_P6, 0x4, "sb_drains"},
2N/A {V_P6, 0x5, "misalign_mem_ref"},
2N/A
2N/A /*
2N/A * Instruction decoding and retirement
2N/A */
2N/A {V_P6, 0xc0, "inst_retired"},
2N/A {V_P6, 0xc2, "uops_retired"},
2N/A {V_P6, 0xd0, "inst_decoder"},
2N/A
2N/A /*
2N/A * Interrupts
2N/A */
2N/A {V_P6, 0xc8, "hw_int_rx"},
2N/A {V_P6, 0xc6, "cycles_int_masked"},
2N/A {V_P6, 0xc7, "cycles_int_pending_and_masked"},
2N/A
2N/A /*
2N/A * Branches
2N/A */
2N/A {V_P6, 0xc4, "br_inst_retired"},
2N/A {V_P6, 0xc5, "br_miss_pred_retired"},
2N/A {V_P6, 0xc9, "br_taken_retired"},
2N/A {V_P6, 0xca, "br_miss_pred_taken_ret"},
2N/A {V_P6, 0xe0, "br_inst_decoded"},
2N/A {V_P6, 0xe2, "btb_misses"},
2N/A {V_P6, 0xe4, "br_bogus"},
2N/A {V_P6, 0xe6, "baclears"},
2N/A
2N/A /*
2N/A * Stalls
2N/A */
2N/A {V_P6, 0xa2, "resource_stalls"},
2N/A {V_P6, 0xd2, "partial_rat_stalls"},
2N/A
2N/A /*
2N/A * Segment register loads
2N/A */
2N/A {V_P6, 0x6, "segment_reg_loads"},
2N/A
2N/A /*
2N/A * Clocks
2N/A */
2N/A {V_P6, 0x79, "cpu_clk_unhalted"},
2N/A
2N/A /*
2N/A * MMX
2N/A */
2N/A {V_P6mmx, 0xb0, "mmx_instr_exec"},
2N/A {V_P6mmx, 0xb1, "mmx_sat_instr_exec"},
2N/A {V_P6mmx, 0xb2, "mmx_uops_exec"},
2N/A {V_P6mmx, 0xb3, "mmx_instr_type_exec"},
2N/A {V_P6mmx, 0xcc, "fp_mmx_trans"},
2N/A {V_P6mmx, 0xcd, "mmx_assists"},
2N/A {V_P6mmx, 0xce, "mmx_instr_ret"},
2N/A {V_P6mmx, 0xd4, "seg_rename_stalls"},
2N/A {V_P6mmx, 0xd5, "seg_reg_renames"},
2N/A {V_P6mmx, 0xd6, "ret_seg_renames"},
2N/A
2N/A {V_END}
2N/A};
2N/A
2N/A#define MAPCPUVER(cpuver) (cpuvermap[(cpuver) - CPC_PENTIUM])
2N/A
2N/Astatic int
2N/Avalidargs(int cpuver, int regno)
2N/A{
2N/A if (regno < 0 || regno > 1)
2N/A return (0);
2N/A cpuver -= CPC_PENTIUM;
2N/A if (cpuver < 0 ||
2N/A cpuver >= sizeof (cpuvermap) / sizeof (cpuvermap[0]))
2N/A return (0);
2N/A return (1);
2N/A}
2N/A
2N/A/*ARGSUSED*/
2N/Astatic int
2N/Aversionmatch(int cpuver, int regno, const struct nametable *n)
2N/A{
2N/A if (!validargs(cpuver, regno) || (n->ver & MAPCPUVER(cpuver)) == 0)
2N/A return (0);
2N/A
2N/A switch (MAPCPUVER(cpuver)) {
2N/A case V_P5:
2N/A case V_P5 | V_P5mmx:
2N/A break;
2N/A case V_P6:
2N/A case V_P6 | V_P6mmx:
2N/A switch (n->bits) {
2N/A case 0xc1: /* flops */
2N/A case 0x10: /* fp_comp_ops_exe */
2N/A case 0x14: /* cycles_div_busy */
2N/A /* only reg0 counts these */
2N/A if (regno == 1)
2N/A return (0);
2N/A break;
2N/A case 0x11: /* fp_assist */
2N/A case 0x12: /* mul */
2N/A case 0x13: /* div */
2N/A /* only 1 can count these */
2N/A if (regno == 0)
2N/A return (0);
2N/A break;
2N/A default:
2N/A break;
2N/A }
2N/A break;
2N/A default:
2N/A return (0);
2N/A }
2N/A
2N/A return (1);
2N/A}
2N/A
2N/Astatic const struct nametable *
2N/Agetnametable(int cpuver, int regno)
2N/A{
2N/A const struct nametable *n;
2N/A
2N/A if (!validargs(cpuver, regno))
2N/A return (NULL);
2N/A
2N/A switch (MAPCPUVER(cpuver)) {
2N/A case V_P5:
2N/A case V_P5 | V_P5mmx:
2N/A n = P5mmx_names[regno];
2N/A break;
2N/A case V_P6:
2N/A case V_P6 | V_P6mmx:
2N/A n = P6_names;
2N/A break;
2N/A default:
2N/A n = NULL;
2N/A break;
2N/A }
2N/A
2N/A return (n);
2N/A}
2N/A
2N/Avoid
2N/Acpc_walk_names(int cpuver, int regno, void *arg,
2N/A void (*action)(void *, int, const char *, uint8_t))
2N/A{
2N/A const struct nametable *n;
2N/A
2N/A if ((n = getnametable(cpuver, regno)) == NULL)
2N/A return;
2N/A for (; n->ver != V_END; n++)
2N/A if (versionmatch(cpuver, regno, n))
2N/A action(arg, regno, n->name, n->bits);
2N/A}
2N/A
2N/Aconst char *
2N/A__cpc_reg_to_name(int cpuver, int regno, uint8_t bits)
2N/A{
2N/A const struct nametable *n;
2N/A
2N/A if ((n = getnametable(cpuver, regno)) == NULL)
2N/A return (NULL);
2N/A for (; n->ver != V_END; n++)
2N/A if (bits == n->bits && versionmatch(cpuver, regno, n))
2N/A return (n->name);
2N/A return (NULL);
2N/A}
2N/A
2N/A/*
2N/A * Register names can be specified as strings or even as numbers
2N/A */
2N/Aint
2N/A__cpc_name_to_reg(int cpuver, int regno, const char *name, uint8_t *bits)
2N/A{
2N/A const struct nametable *n;
2N/A char *eptr = NULL;
2N/A long value;
2N/A
2N/A if ((n = getnametable(cpuver, regno)) == NULL || name == NULL)
2N/A return (-1);
2N/A for (; n->ver != V_END; n++)
2N/A if (strcmp(name, n->name) == 0 &&
2N/A versionmatch(cpuver, regno, n)) {
2N/A *bits = n->bits;
2N/A return (0);
2N/A }
2N/A
2N/A value = strtol(name, &eptr, 0);
2N/A if (name != eptr && value >= 0 && value <= UINT8_MAX) {
2N/A *bits = (uint8_t)value;
2N/A return (0);
2N/A }
2N/A
2N/A return (-1);
2N/A}
2N/A
2N/Aconst char *
2N/Acpc_getcciname(int cpuver)
2N/A{
2N/A if (validargs(cpuver, 0))
2N/A switch (MAPCPUVER(cpuver)) {
2N/A case V_P5:
2N/A return ("Pentium");
2N/A case V_P5 | V_P5mmx:
2N/A return ("Pentium with MMX");
2N/A case V_P6:
2N/A return ("Pentium Pro, Pentium II");
2N/A case V_P6 | V_P6mmx:
2N/A return ("Pentium Pro with MMX, Pentium II");
2N/A default:
2N/A break;
2N/A }
2N/A return (NULL);
2N/A}
2N/A
2N/Aconst char *
2N/Acpc_getcpuref(int cpuver)
2N/A{
2N/A if (validargs(cpuver, 0))
2N/A switch (MAPCPUVER(cpuver)) {
2N/A case V_P5:
2N/A case V_P5 | V_P5mmx:
2N/A return (gettext(
2N/A "See Appendix A.2 of the \"Intel Architecture "
2N/A "Software Developer's Manual,\" 243192, 1997"));
2N/A case V_P6:
2N/A case V_P6 | V_P6mmx:
2N/A return (gettext(
2N/A "See Appendix A.1 of the \"Intel Architecture "
2N/A "Software Developer's Manual,\" 243192, 1997"));
2N/A default:
2N/A break;
2N/A }
2N/A return (NULL);
2N/A}
2N/A
2N/A/*
2N/A * This is a functional interface to allow CPUs with fewer %pic registers
2N/A * to share the same data structure as those with more %pic registers
2N/A * within the same instruction set family.
2N/A */
2N/Auint_t
2N/Acpc_getnpic(int cpuver)
2N/A{
2N/A switch (cpuver) {
2N/A case CPC_PENTIUM:
2N/A case CPC_PENTIUM_MMX:
2N/A case CPC_PENTIUM_PRO:
2N/A case CPC_PENTIUM_PRO_MMX:
2N/A#define EVENT ((cpc_event_t *)0)
2N/A return (sizeof (EVENT->ce_pic) / sizeof (EVENT->ce_pic[0]));
2N/A#undef EVENT
2N/A default:
2N/A return (0);
2N/A }
2N/A}
2N/A
2N/A#define BITS(v, u, l) \
2N/A (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
2N/A
2N/A#include "getcpuid.h"
2N/A
2N/A/*
2N/A * Return the version of the current processor.
2N/A *
2N/A * Version -1 is defined as 'not performance counter capable'
2N/A */
2N/Aint
2N/Acpc_getcpuver(void)
2N/A{
2N/A static int ver = -1;
2N/A uint32_t maxeax;
2N/A uint32_t vbuf[4];
2N/A
2N/A if (ver != -1)
2N/A return (ver);
2N/A
2N/A maxeax = cpc_getcpuid(0, &vbuf[0], &vbuf[2], &vbuf[1]);
2N/A {
2N/A char *vendor = (char *)vbuf;
2N/A vendor[12] = '\0';
2N/A
2N/A if (strcmp(vendor, "GenuineIntel") != 0)
2N/A return (ver);
2N/A }
2N/A
2N/A if (maxeax >= 1) {
2N/A int family, model;
2N/A uint32_t eax, ebx, ecx, edx;
2N/A
2N/A eax = cpc_getcpuid(1, &ebx, &ecx, &edx);
2N/A
2N/A if ((family = BITS(eax, 11, 8)) == 0xf)
2N/A family = BITS(eax, 27, 20);
2N/A if ((model = BITS(eax, 7, 4)) == 0xf)
2N/A model = BITS(eax, 19, 16);
2N/A
2N/A /*
2N/A * map family and model into the performance
2N/A * counter architectures we currently understand.
2N/A *
2N/A * See application note AP485 (from developer.intel.com)
2N/A * for further explanation.
2N/A */
2N/A switch (family) {
2N/A case 5: /* Pentium and Pentium with MMX */
2N/A ver = model < 4 ?
2N/A CPC_PENTIUM : CPC_PENTIUM_MMX;
2N/A break;
2N/A case 6: /* Pentium Pro and Pentium II and III */
2N/A ver = BITS(edx, 23, 23) ? /* mmx check */
2N/A CPC_PENTIUM_PRO_MMX : CPC_PENTIUM_PRO;
2N/A break;
2N/A default:
2N/A case 0xf: /* Pentium IV */
2N/A break;
2N/A }
2N/A }
2N/A
2N/A return (ver);
2N/A}