/*
* Copyright 2009, Intel Corporation
* Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
*
* This file is part of PowerTOP
*
* This program file is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program in a file named COPYING; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301 USA
*
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
* Eric C Saxe <eric.saxe@sun.com>
* Aubrey Li <aubrey.li@intel.com>
*/
/*
* GPL Disclaimer
*
* For the avoidance of doubt, except that if any license choice other
* than GPL or LGPL is available it will apply instead, Sun elects to
* use only the General Public License version 2 (GPLv2) at this time
* for any software where a choice of GPL license versions is made
* available with the language indicating that GPLv2 or any later
* version may be used, or where a choice of which version of the GPL
* is applied is otherwise unspecified.
*/
#include <stdarg.h>
#include <stdlib.h>
#include <libgen.h>
#include <unistd.h>
#include <strings.h>
#include <sys/systeminfo.h>
#include <kstat.h>
#include <time.h>
#include <errno.h>
#include "powertop.h"
static char PROG_FMT[] = "%s: ";
static char ERR_FMT[] = ": %s";
static char *progname;
void
pt_set_progname(char *name)
{
progname = basename(name);
}
/*PRINTFLIKE1*/
void
pt_error(char *format, ...)
{
int err = errno;
va_list alist;
if (g_gui)
return;
if (progname != NULL)
(void) fprintf(stderr, PROG_FMT, progname);
va_start(alist, format);
(void) vfprintf(stderr, format, alist);
va_end(alist);
if (strchr(format, '\n') == NULL)
(void) fprintf(stderr, ERR_FMT, strerror(err));
}
/*
* Returns the number of online CPUs.
*/
uint_t
pt_enumerate_cpus(void)
{
int cpuid;
int max, cpus_conf;
uint_t ncpus = 0;
max = sysconf(_SC_CPUID_MAX);
cpus_conf = sysconf(_SC_NPROCESSORS_CONF);
/* Fall back to one CPU if any of the sysconf calls above failed */
if (max == -1 || cpus_conf == -1) {
max = cpus_conf = 1;
}
/* Free any previous cpu table */
if (g_cpu_table != NULL)
free(g_cpu_table);
if ((g_cpu_table = malloc(cpus_conf * sizeof (processorid_t))) == NULL)
return (0);
for (cpuid = 0; cpuid < max; cpuid++) {
if (p_online(cpuid, P_STATUS) == P_ONLINE) {
g_cpu_table[ncpus] = cpuid;
ncpus++;
}
}
return (ncpus);
}
/*
* Simple integer comparison routine for the event report qsort(3C).
*/
int
pt_event_compare(const void *p1, const void *p2)
{
event_info_t *ip = ((event_info_t *)p1);
event_info_t *jp = ((event_info_t *)p2);
if (ip->total_count > jp->total_count)
return (-1);
if (ip->total_count < jp->total_count)
return (1);
return (0);
}
void
pt_usage(void)
{
(void) fprintf(stderr, "%s %s\n\n", TITLE, COPYRIGHT_INTEL);
(void) fprintf(stderr, "usage: powertop [option]\n");
(void) fprintf(stderr, " -d, --dump [count] Read wakeups count "
"times and print list of top offenders\n");
(void) fprintf(stderr, " -t, --time [interval] Default time to gather "
"data in seconds [1-30s]\n");
(void) fprintf(stderr, " -v, --verbose Verbose mode, reports "
"kernel cyclic activity\n");
(void) fprintf(stderr, " -c, --cpu [CPU] Only observe a specific"
" CPU\n");
(void) fprintf(stderr, " -h, --help Show this help "
"message\n");
}
int
pt_get_bit_depth(void)
{
/*
* This little routine was derived from isainfo.c to look up
* the system's bit depth. It feeds a 10 byte long buffer to
* sysinfo (we only need the first word, sysinfo truncates and
* \0 terminates the rest) from which we figure out which isa
* we're running on.
*/
char buf[BIT_DEPTH_BUF];
if (sysinfo(SI_ARCHITECTURE_64, buf, BIT_DEPTH_BUF) == -1)
if (sysinfo(SI_ARCHITECTURE_32, buf, BIT_DEPTH_BUF) == -1)
return (-2);
if (strcmp(buf, "sparc") == 0 || strcmp(buf, "i386") == 0)
return (32);
if (strcmp(buf, "sparcv9") == 0 || strcmp(buf, "amd64") == 0)
return (64);
return (-3);
}
void
pt_kstat_cpu_walk(struct snapshot *old, struct snapshot *new)
{
int i;
if (new == NULL)
return;
for (i = 0; i < CPU_MAX_PSTATES; i++) {
g_pstate_info[i].total_time = 0;
}
for (i = 0; i < CPU_MAX_CSTATES; i++) {
g_cstate_info[i].total_time = 0;
g_cstate_info[i].last_time = 0;
g_cstate_info[i].events = 0;
}
g_total_c_time = 0;
g_total_p_time = 0;
g_total_events = 0;
switch (g_op_mode) {
case PT_MODE_CPU:
pt_cpufreq_kstat_account_one(old, new);
break;
case PT_MODE_DEFAULT:
default:
pt_cpufreq_kstat_account_all(old, new);
break;
}
}
void
pt_cpufreq_kstat_account_all(struct snapshot *old, struct snapshot *new)
{
struct cpu_snapshot *cpu = NULL;
struct cpu_snapshot *newcpu = NULL;
int i;
if (new == NULL)
return;
for (i = 0; i < new->s_num_cpus; i++) {
cpu = NULL;
newcpu = &new->s_cpus[i];
if (old) {
cpu = &old->s_cpus[i];
}
if (newcpu == NULL)
return;
pt_update_cpu_kstats(cpu, newcpu);
}
}
void
pt_cpufreq_kstat_account_one(struct snapshot *old, struct snapshot *new)
{
struct cpu_snapshot *cpu = NULL;
struct cpu_snapshot *newcpu = NULL;
int i = g_observed_cpu;
if (i >= g_ncpus || i < 0 || new == NULL)
return;
newcpu = &new->s_cpus[i];
if (old) {
cpu = &old->s_cpus[i];
}
if (newcpu == NULL)
return;
pt_update_cpu_kstats(cpu, newcpu);
}
void
pt_update_cpu_kstats(struct cpu_snapshot *c1, struct cpu_snapshot *c2)
{
char *s1, *s2;
uint64_t c_nsec[CPU_MAX_CSTATES]; /* more p than c states */
uint64_t c_count[CPU_MAX_CSTATES];
uint64_t p_nsec[CPU_MAX_PSTATES];
int max_pstates, max_cstates;
int i, j, n;
/*
* 2^64 - 1; includes null terminator
* max string will contain n-1 ':'s and one null terminator
*/
int max_strlen = MAX_UINT64_STRLEN * CPU_MAX_PSTATES;
/*
* the first output will have c1 = NULL, to give results since boot
*/
if (c1) {
/* check there are stats to report */
if (!CPU_ACTIVE(c1))
return;
}
/* check there are stats to report */
if (c2) {
if (!CPU_ACTIVE(c2))
return;
}
if (c2->cs_id >= g_ncpus || c2->cs_id < 0)
return;
s1 = malloc(max_strlen);
if (s1 == NULL)
return;
s2 = malloc(max_strlen);
if (s2 == NULL) {
free(s1);
return;
}
if (g_features & FEATURE_CSTATE) {
max_cstates = (int)pt_kstat_long(&c2->cs_cpu_info,
"supported_max_cstates", &g_supported_max_cstates_index);
g_max_cstates = ++max_cstates;
if (max_cstates > CPU_MAX_CSTATES) {
pt_error("max_cstates %d out of range\n",
max_cstates);
exit(2);
}
for (i = 0; i < max_cstates; i++) {
c_nsec[i] = 0;
c_count[i] = 0;
}
(void) snprintf(s1, max_strlen, "%s", c1->cstates_nsec);
(void) snprintf(s2, max_strlen, "%s", c2->cstates_nsec);
n = pt_delta_str(s1, s2, c_nsec, max_cstates);
if (n != max_cstates) {
free(s1);
free(s2);
return;
}
(void) snprintf(s1, max_strlen, "%s", c1->cstates_count);
(void) snprintf(s2, max_strlen, "%s", c2->cstates_count);
n = pt_delta_str(s1, s2, c_count, max_cstates);
if (n != max_cstates) {
free(s1);
free(s2);
return;
}
/*
* C-State Kstats are in C(n) ... C(0) Order
* Powertop displays thenm in C(0) to C(n) order.
*/
for (i = 0; i < max_cstates; i++) {
j = max_cstates - i - 1;
g_cstate_info[i].events += c_count[j];
g_cstate_info[i].last_time = c_nsec[j];
g_cstate_info[i].total_time += c_nsec[j];
g_total_c_time += c_nsec[j];
}
/* g_total_events is tranitions into C0 */
if (max_cstates > 0)
g_total_events += c_count[max_cstates - 1];
}
if (g_features & FEATURE_PSTATE) {
max_pstates = (int)pt_kstat_long(&c2->cs_cpu_info,
"supported_max_pstates",
&g_supported_max_pstates_index);
g_max_pstates = ++max_pstates;
if (max_pstates > CPU_MAX_PSTATES) {
pt_error("max_pstates %d out of range\n",
max_pstates);
exit(2);
}
for (i = 0; i < max_pstates; i++) {
p_nsec[i] = 0;
}
(void) snprintf(s1, max_strlen, "%s", c1->pstates_nsec);
(void) snprintf(s2, max_strlen, "%s", c2->pstates_nsec);
n = pt_delta_str(s1, s2, p_nsec, max_pstates);
if (n != max_pstates) {
free(s1);
free(s2);
return;
}
for (i = 0; i < max_pstates; i++) {
/*
* P-State Kstats are in P(n) .. P(0) order
* powertop displays them is this order.
*/
g_pstate_info[i].total_time += p_nsec[i];
g_total_p_time += p_nsec[i];
}
}
free(s1);
free(s2);
}
/*
* The snapshot has changed for whatever reason. We need to regenerate
* all information pertaining to the cpus including the record offsets.
*/
int
pt_generate_cpu_information(kstat_ctl_t *kc)
{
size_t i;
struct kstat_cpu_records *kstat_cpu_information;
int max_cpus;
/*
* Re-enumerate the system's CPUs, populate cpu_table, g_ncpus
*/
if ((g_ncpus = pt_enumerate_cpus()) == 0)
exit(EXIT_FAILURE);
if (!PT_ON_CPU)
g_ncpus_observed = g_ncpus;
max_cpus = g_ncpus;
if (g_ss_state_info.kstat_cpu_information)
free(g_ss_state_info.kstat_cpu_information);
if ((g_ss_state_info.kstat_cpu_information =
(struct kstat_cpu_records *)calloc(max_cpus,
sizeof (struct kstat_cpu_records))) == NULL)
return (errno);
kstat_cpu_information = g_ss_state_info.kstat_cpu_information;
/*
* Walk the cpu snapshot chain, looking for the information we desire.
*/
g_ss_state_info.kstat_cpus_active = 0;
for (i = 0; i < max_cpus; i++) {
kstat_cpu_information[i].cs_state = p_online(i, P_STATUS);
/* If no valid CPU is present, move on to the next one */
if (!(CPU_ONLINE(kstat_cpu_information[i].cs_state))) {
kstat_cpu_information[i].cs_id = ID_NO_CPU;
if (PT_ON_CPU) {
return (-1);
}
continue;
}
kstat_cpu_information[i].cs_id = i;
g_ss_state_info.kstat_cpus_active++;
if ((kstat_cpu_information[i].cpu_info_ksp_ptr =
kstat_lookup(kc, "cpu_info", i, NULL)) == NULL)
return (errno);
}
return (0);
}
/*
* Note: the following helpers do not clean up on the failure case,
* because pt_free_snapshot() is called in main each time through the loop
*/
int
pt_acquire_cpus(struct snapshot *ss, kstat_ctl_t *kc)
{
kstat_t *ksp;
kstat_named_t *knp;
size_t i;
struct kstat_cpu_records *kstat_cpu_information = NULL;
int rtc, len;
ss->s_num_cpus = g_ncpus;
if ((ss->s_cpus = calloc(ss->s_num_cpus,
sizeof (struct cpu_snapshot))) == NULL)
return (errno);
/*
* If the snap has changed or we have not obtained the
* kstat_cpu_information, then go and get the cpu information.
*/
if ((ss->snap_changed || !g_ss_state_info.kstat_cpu_information) &&
((rtc = pt_generate_cpu_information(kc)) != 0))
return (rtc);
kstat_cpu_information = g_ss_state_info.kstat_cpu_information;
/*
* Read the data in that changes between the intervals.
*/
g_ss_state_info.kstat_cpus_active = 0;
for (i = 0; i < ss->s_num_cpus; i++) {
ss->s_cpus[i].cs_id = kstat_cpu_information[i].cs_id;
ss->s_cpus[i].cs_state = kstat_cpu_information[i].cs_state;
/* If no valid CPU is present, move on to the next one */
if (ss->s_cpus[i].cs_state == ID_NO_CPU)
continue;
if (!CPU_ACTIVE(&ss->s_cpus[i]))
continue;
g_ss_state_info.kstat_cpus_active++;
ksp = kstat_cpu_information[i].cpu_info_ksp_ptr;
if (kstat_read(kc, ksp, NULL) == -1)
return (errno);
if (pt_kstat_copy(ksp, &ss->s_cpus[i].cs_cpu_info))
return (errno);
/* now copy the cstate and pstate strings */
if (g_features & FEATURE_CSTATE) {
knp = pt_kstat_data_lookup(ksp, "cstates_nsec",
&g_cstates_nsec_index);
if (knp == NULL)
return (errno);
if (knp->value.str.addr.ptr == NULL)
return (ENOENT);
len = strlen(knp->value.str.addr.ptr);
(void) memcpy(&ss->s_cpus[i].cstates_nsec,
knp->value.str.addr.ptr, len);
knp = pt_kstat_data_lookup(ksp, "cstates_count",
&g_cstates_count_index);
if (knp == NULL)
return (errno);
if (knp->value.str.addr.ptr == NULL)
return (ENOENT);
len = strlen(knp->value.str.addr.ptr);
(void) memcpy(&ss->s_cpus[i].cstates_count,
knp->value.str.addr.ptr, len);
}
if (g_features & FEATURE_PSTATE) {
knp = pt_kstat_data_lookup(ksp, "pstates_nsec",
&g_pstates_nsec_index);
if (knp == NULL)
return (errno);
if (knp->value.str.addr.ptr == NULL)
return (ENOENT);
len = strlen(knp->value.str.addr.ptr);
(void) memcpy(&ss->s_cpus[i].pstates_nsec,
knp->value.str.addr.ptr, len);
}
}
errno = 0;
return (errno);
}
/*
* Note: the following helpers do not clean up on the failure case,
* because pt_free_snapshot() is called in main each time through the loop
*/
int
pt_acquire_cpu(struct snapshot *ss, kstat_ctl_t *kc)
{
kstat_t *ksp;
kstat_named_t *knp;
struct kstat_cpu_records *kstat_cpu_information = NULL;
int rtc;
int i = g_observed_cpu;
if (i < 0 || i >= g_ncpus) {
errno = EINVAL;
return (errno);
}
ss->s_num_cpus = g_ncpus;
if ((ss->s_cpus = calloc(ss->s_num_cpus,
sizeof (struct cpu_snapshot))) == NULL)
return (errno);
/*
* If the snap has changed or we have not obtained the
* kstat_cpu_information, then go and get the cpu information.
*/
if (ss->snap_changed || !g_ss_state_info.kstat_cpu_information)
if ((rtc = pt_generate_cpu_information(kc)) != 0)
return (rtc);
kstat_cpu_information = g_ss_state_info.kstat_cpu_information;
/*
* Read the data in that changes between the intervals.
*/
ss->s_cpus[i].cs_id = kstat_cpu_information[i].cs_id;
ss->s_cpus[i].cs_state = kstat_cpu_information[i].cs_state;
ksp = kstat_cpu_information[i].cpu_info_ksp_ptr;
if (kstat_read(kc, ksp, NULL) == -1)
return (errno);
if (pt_kstat_copy(ksp, &ss->s_cpus[i].cs_cpu_info))
return (errno);
/*
* now copy the cstate and pstate strings
*/
if (g_features & FEATURE_CSTATE) {
knp = pt_kstat_data_lookup(ksp, "cstates_nsec",
&g_cstates_nsec_index);
if (knp == NULL)
return (errno);
if (knp->value.str.addr.ptr == NULL)
return (ENOENT);
(void) memcpy(&ss->s_cpus[i].cstates_nsec,
knp->value.str.addr.ptr, strlen(knp->value.str.addr.ptr));
knp = pt_kstat_data_lookup(ksp, "cstates_count",
&g_cstates_count_index);
if (knp == NULL)
return (errno);
if (knp->value.str.addr.ptr == NULL)
return (ENOENT);
(void) memcpy(&ss->s_cpus[i].cstates_count,
knp->value.str.addr.ptr, strlen(knp->value.str.addr.ptr));
}
if (g_features & FEATURE_PSTATE) {
knp = pt_kstat_data_lookup(ksp, "pstates_nsec",
&g_pstates_nsec_index);
if (knp == NULL)
return (errno);
if (knp->value.str.addr.ptr == NULL)
return (ENOENT);
(void) memcpy(&ss->s_cpus[i].pstates_nsec,
knp->value.str.addr.ptr, strlen(knp->value.str.addr.ptr));
}
errno = 0;
return (errno);
}
struct snapshot *
pt_acquire_kstat_snapshot(kstat_ctl_t **kc_passed)
{
struct snapshot *ss = NULL;
kstat_ctl_t *kc = *kc_passed;
int err = 0;
int retry_cnt = 0;
ss = pt_safe_alloc(sizeof (struct snapshot));
if (ss == NULL) {
pt_error("acquiring snapshot failed, no memory");
exit(2);
}
(void) memset(ss, 0, sizeof (struct snapshot));
while ((ss->snap_changed = kstat_chain_update(kc)) == -1) {
kc->kc_chain_id = (kid_t)ioctl(kc->kc_kd,
KSTAT_IOC_CHAIN_ID, NULL);
if (errno == EAGAIN) {
(void) nanosleep(&g_rqtp, NULL);
}
retry_cnt++;
if (retry_cnt > PT_KSTAT_CHAIN_RETRYS)
break;
}
if (retry_cnt > PT_KSTAT_CHAIN_RETRYS) {
err = kstat_close(kc);
kc = kstat_open();
g_kc = kc;
}
if (kc == NULL) {
pt_error("kstat_chain_update failed");
exit(2);
}
if (PT_ON_CPU)
err = pt_acquire_cpu(ss, kc);
else
err = pt_acquire_cpus(ss, kc);
if (err)
return (NULL);
else
return (ss);
}
void
pt_free_snapshot(struct snapshot *ss)
{
size_t i;
if (ss == NULL)
return;
if (ss->s_cpus) {
for (i = 0; i < ss->s_num_cpus; i++) {
SAFE_FREE(ss->s_cpus[i].cs_cpu_info.ks_data,
sizeof (ss->s_cpus[i].cs_cpu_info.ks_data));
}
SAFE_FREE(ss->s_cpus, sizeof (struct cpu_snapshot *));
}
SAFE_FREE(ss, sizeof (struct snapshot));
}
kstat_ctl_t *
pt_open_kstat(void)
{
kstat_ctl_t *kc;
while ((kc = kstat_open()) == NULL) {
if (errno == EAGAIN)
(void) nanosleep(&g_rqtp, NULL);
else {
pt_error("kstat_open failed");
exit(2);
}
}
return (kc);
}
void *
pt_safe_alloc(size_t size)
{
void *ptr;
while ((ptr = malloc(size)) == NULL) {
if (errno == EAGAIN) {
(void) nanosleep(&g_rqtp, NULL);
} else {
pt_error("malloc failed");
exit(2);
}
}
return (ptr);
}
/*
* Convert a ':'-separated null terminated string into an array of values
* Assumptions:
* It is OK to modify the input string
* There will always be at least two entries (e.g. one ':') in the string.
* Return the number of entries found, or 0 for a misformed string
*/
int
pt_delta_str_helper(char *s, uint64_t *values, int entries)
{
int i;
char *cp, *sp = s;
if (entries <= 1 || *s == '\0')
return (0);
for (i = 0; i < (entries - 1); i++) {
cp = strchr(sp, ':');
if (cp == NULL)
break;
*cp = '\0';
values[i] = strtoull(sp, 0, NULL);
sp = cp + 1;
}
if (i == (entries - 1)) {
values[i] = strtoull(sp, 0, NULL);
return (entries);
} else {
return (0);
}
}
/*
* Convert a pair of strings of ':'-separated uint64 numbers into an array of
* the difference between the values of the numbers in each position.
* Assumes, based on knowing the context in which it is called:
* both strings have the same number of entries
* some numbers may be "empty" (e.g. the string may contain consecutive :'s
* if an entry in one string is empty then the other will be too
* the difference between two empty strings is computed as 0
* numbers represent nanoseconds, which won't wrap in our lifetime
* Returns the number of entries found in the string, else 0 if misformed.
*/
int
pt_delta_str(char *s1, char *s2, uint64_t *delta, int entries)
{
uint64_t s1_vals[CPU_MAX_PSTATES];
uint64_t s2_vals[CPU_MAX_PSTATES];
int i, n1, n2;
if (entries > CPU_MAX_PSTATES)
return (0);
n1 = pt_delta_str_helper(s1, s1_vals, entries);
if (n1 == 0)
return (0);
n2 = pt_delta_str_helper(s2, s2_vals, entries);
if (n1 != n2 || n1 != entries)
return (0);
for (i = 0; i < entries; i++)
delta[i] = s2_vals[i] - s1_vals[i];
return (entries);
}
long
pt_kstat_long(kstat_t *ksp, char *name, int *index)
{
kstat_named_t *kp;
kp = pt_kstat_data_lookup(ksp, name, index);
if (kp != NULL)
return (kp->value.l);
else {
return (0);
}
}
int
pt_kstat_copy(const kstat_t *src, kstat_t *dst)
{
*dst = *src;
if (src->ks_data != NULL) {
if ((dst->ks_data = malloc(src->ks_data_size)) == NULL)
return (-1);
bcopy(src->ks_data, dst->ks_data, src->ks_data_size);
dst->ks_data_size = src->ks_data_size;
} else {
dst->ks_data = NULL;
dst->ks_data_size = 0;
}
return (0);
}
/*
* If index_ptr integer value is > -1 then the index points to the
* string entry in the ks_data that we are interested in. Otherwise
* we will need to walk the array.
*/
void *
pt_kstat_data_lookup(kstat_t *ksp, char *name, int *index_ptr)
{
int i;
int size;
int index;
char *namep, *datap;
if (ksp == NULL) {
return (NULL);
}
switch (ksp->ks_type) {
case KSTAT_TYPE_NAMED:
size = sizeof (kstat_named_t);
namep = KSTAT_NAMED_PTR(ksp)->name;
break;
case KSTAT_TYPE_TIMER:
size = sizeof (kstat_timer_t);
namep = KSTAT_TIMER_PTR(ksp)->name;
break;
default:
errno = EINVAL;
return (NULL);
}
index = *index_ptr;
if (index >= 0) {
/* Short cut to the information. */
datap = ksp->ks_data;
datap = &datap[size*index];
return (datap);
}
/* Need to go find the string. */
datap = ksp->ks_data;
for (i = 0; i < ksp->ks_ndata; i++) {
if (strcmp(name, namep) == 0) {
*index_ptr = i;
return (datap);
}
namep += size;
datap += size;
}
errno = ENOENT;
return (NULL);
}