/*
* Copyright 2009, Intel Corporation
* Copyright 2009, Sun Microsystems, Inc
*
* This file is part of PowerTOP
*
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program in a file named COPYING; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301 USA
*
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
* Eric C Saxe <eric.saxe@sun.com>
* Aubrey Li <aubrey.li@intel.com>
*/
/*
* GPL Disclaimer
*
* For the avoidance of doubt, except that if any license choice other
* than GPL or LGPL is available it will apply instead, Sun elects to
* use only the General Public License version 2 (GPLv2) at this time
* for any software where a choice of GPL license versions is made
* available with the language indicating that GPLv2 or any later
* version may be used, or where a choice of which version of the GPL
* is applied is otherwise unspecified.
*/
#include <stdlib.h>
#include <string.h>
#include <dtrace.h>
#include <kstat.h>
#include <errno.h>
#include "powertop.h"
static char **dtp_argv;
/*
* Enabling PM through /etc/power.conf
* See pt_cpufreq_suggest()
*/
/*
* Buffer containing DTrace program to track CPU frequency transitions
*/
static const char *dtp_cpufreq =
"hrtime_t last[$0];"
""
"BEGIN"
"{"
" begin = timestamp;"
"}"
""
":::cpu-change-speed"
"/last[(processorid_t)arg0] != 0/"
"{"
" this->cpu = (processorid_t)arg0;"
" this->oldspeed = (uint64_t)arg1;"
" @times[this->cpu, this->oldspeed] = sum(timestamp - last[this->cpu]);"
" last[this->cpu] = timestamp;"
"}"
":::cpu-change-speed"
"/last[(processorid_t)arg0] == 0/"
"{"
" this->cpu = (processorid_t)arg0;"
" this->oldspeed = (uint64_t)arg1;"
" @times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
" last[this->cpu] = timestamp;"
"}";
/*
* Same as above, but only for a specific CPU
*/
static const char *dtp_cpufreq_c =
"hrtime_t last;"
""
"BEGIN"
"{"
" begin = timestamp;"
"}"
""
":::cpu-change-speed"
"/(processorid_t)arg0 == $1 &&"
" last != 0/"
"{"
" this->cpu = (processorid_t)arg0;"
" this->oldspeed = (uint64_t)arg1;"
" @times[this->cpu, this->oldspeed] = sum(timestamp - last);"
" last = timestamp;"
"}"
":::cpu-change-speed"
"/(processorid_t)arg0 == $1 &&"
" last == 0/"
"{"
" this->cpu = (processorid_t)arg0;"
" this->oldspeed = (uint64_t)arg1;"
" @times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
" last = timestamp;"
"}";
static int pt_cpufreq_setup(void);
static int pt_cpufreq_snapshot(void);
static int pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *, void *);
static void pt_cpufreq_stat_account(double, uint_t);
static int pt_cpufreq_check_pm(void);
static void pt_cpufreq_enable(void);
static int
pt_cpufreq_setup(void)
{
return (1);
return (1);
}
if (PT_ON_CPU) {
== NULL) {
return (1);
}
}
return (0);
}
/*
* Perform setup necessary to enumerate and track CPU speed changes
*/
int
pt_cpufreq_stat_prepare(void)
{
int err;
if ((err = pt_cpufreq_setup()) != 0) {
pt_error("failed to setup %s report (couldn't allocate "
"memory)\n", g_msg_freq_state);
return (errno);
}
sizeof (cpu_power_info_t))) == NULL)
return (-1);
/*
* Enumerate the CPU frequencies
*/
return (errno);
(void) kstat_close(kc);
return (err);
}
g_npstates = 0;
g_npstates++;
state++;
}
pt_error("CPU exceeds the supported number of %s\n",
(void) kstat_close(kc);
/*
* Return if speed transition is not supported
*/
if (g_npstates < 2)
return (-1);
/*
* Setup DTrace to look for CPU frequency changes
*/
pt_error("cannot open dtrace library for the %s report: %s\n",
return (-2);
}
/*
* Execute different scripts (defined above) depending on
* user specified options. Default mode uses dtp_cpufreq.
*/
if (PT_ON_CPU)
prog_ptr = (char *)dtp_cpufreq_c;
else
prog_ptr = (char *)dtp_cpufreq;
return (dtrace_errno(dtp));
}
return (dtrace_errno(dtp));
}
return (dtrace_errno(dtp));
}
return (dtrace_errno(dtp));
}
return (0);
}
/*
* The DTrace probes have already been enabled, and are tracking
* CPU speed transitions. Take a snapshot of the aggregations, and
* look for any CPUs that have made a speed transition over the last
* sampling interval. Note that the aggregations may be empty if no
* speed transitions took place over the last interval. In that case,
* notate that we have already accounted for the time, so that when
* we do encounter a speed transition in a future sampling interval
* we can subtract that time back out.
*/
int
{
int i, ret;
/*
* Zero out the interval time reported by DTrace for
* this interval
*/
for (i = 0; i < g_npstates; i++)
g_pstate_info[i].total_time = 0;
for (i = 0; i < g_ncpus; i++)
g_cpu_power_states[i].dtrace_time = 0;
return (-1);
if (dtrace_aggregate_snap(dtp) != 0)
NULL) != 0)
if ((ret = pt_cpufreq_snapshot()) != 0) {
return (ret);
}
switch (g_op_mode) {
case PT_MODE_CPU:
break;
case PT_MODE_DEFAULT:
default:
for (i = 0; i < g_ncpus_observed; i++)
break;
}
return (0);
}
static void
{
int i;
/*
* 'duration' may be a negative value when we're using or forcing a
* small interval, and the amount of time already accounted ends up
* being larger than the the former.
*/
if (duration < 0)
return;
for (i = 0; i < g_npstates; i++) {
}
}
}
/*
* Take a snapshot of each CPU's speed by looking through the cpu_info kstats.
*/
static int
pt_cpufreq_snapshot(void)
{
int ret;
uint_t i;
return (errno);
switch (g_op_mode) {
case PT_MODE_CPU:
break;
case PT_MODE_DEFAULT:
default:
for (i = 0; i < g_ncpus_observed; i++)
break;
break;
}
if (kstat_close(kc) != 0)
return (ret);
}
static int
{
pt_error("couldn't find 'cpu_info' kstat for CPU %d\n while "
return (1);
}
pt_error("couldn't read 'cpu_info' kstat for CPU %d\n while "
return (2);
}
pt_error("couldn't find 'current_clock_Hz' kstat for CPU %d "
return (3);
}
return (0);
}
/*
* DTrace aggregation walker that sorts through a snapshot of the
* aggregation data collected during firings of the cpu-change-speed
* probe.
*/
/*ARGSUSED*/
static int
{
int i;
/* LINTED - alignment */
/* LINTED - alignment */
/* LINTED - alignment */
if (speed == 0)
speed = max_cpufreq;
else
/*
* We have an aggregation record for "cpu" being at "speed"
* for an interval of "n" nanoseconds. The reported interval
* may exceed the powertop sampling interval, since we only
* notice during potentially infrequent firings of the
* "speed change" DTrace probe. In this case powertop would
* have already accounted for the portions of the interval
* that happened during prior powertop samplings, so subtract
* out time already accounted.
*/
for (i = 0; i < g_npstates; i++) {
if (cp->time_accounted > 0 &&
cp->time_accounted = 0;
cp->speed_accounted = 0;
} else {
return (DTRACE_AGGWALK_NEXT);
}
}
}
}
}
return (DTRACE_AGGWALK_NEXT);
}
/*
* Checks if PM is enabled in /etc/power.conf, enabling if not
*/
void
pt_cpufreq_suggest(void)
{
switch (ret) {
case 0:
pt_sugg_add("Suggestion: enable CPU power management by "
break;
}
}
/*
* Checks /etc/power.conf and returns:
*
* 0 if CPUPM is not enabled
* 1 if there's nothing for us to do because:
* (a) the system does not support frequency scaling
* (b) there's no power.conf.
* 2 if CPUPM is enabled
* 3 if the system is running in poll-mode, as opposed to event-mode
*
* Notice the ordering of the return values, they will be picked up and
* switched upon ascendingly.
*/
static int
pt_cpufreq_check_pm(void)
{
int ret = 0;
return (1);
return (2);
}
}
ret = 3;
}
return (ret);
}
/*
* Used as a suggestion, sets PM in /etc/power.conf and
*/
static void
pt_cpufreq_enable(void)
{
(void) system(cpupm_enable);
(void) system(cpupm_treshold);
(void) system(default_pmconf);
if (pt_sugg_remove(pt_cpufreq_enable) == 0)
pt_error("failed to remove a %s suggestion\n",
}