#! /usr/perl5/bin/perl
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
#
#
# Pg.pm provides object-oriented interface to the Solaris
# Processor Group kstats
#
# See comments in the end
#
package Sun::Solaris::Pg;
use strict;
use warnings;
use Sun::Solaris::Kstat;
use Carp;
use Errno;
use List::Util qw(max sum);
our $VERSION = '1.1';
#
# Currently the OS does not have the root PG and PGs constitute a forest of
# small trees. This module gathers all such trees under one root with ID zero.
# If the root is present already, we do not use faked root.
#
my $ROOT_ID = 0;
#
# PG_NO_PARENT means that kstats have PG parent ID and it is set to -1
# PG_PARENT_UNDEF means that kstats have no PG parent ID
#
use constant {
PG_NO_PARENT => -1,
PG_PARENT_UNDEF => -2,
};
#
# Sorting order between different sharing relationships. This order is used to
# break ties between PGs with the same number of CPUs. If there are two PGs with
# the same set of CPUs, the one with the higher weight will be the parent of the
# one with the lower weight.
#
my %relationships_order = (
'CPU_PM_Idle_Power_Domain' => 1,
'Integer_Pipeline' => 2,
'Cache' => 3,
'CPU_PM_Active_Power_Domain' => 4,
'Floating_Point_Unit' => 5,
'Data_Pipe_to_memory' => 6,
'Memory' => 7,
'Socket' => 8,
'System' => 9,
);
#
# Object interface to the library. These are methods that can be used by the
# module user.
#
#
# Create a new object representing PG
# All the heavy lifting is performed by _init function.
# This function performs all the Perl blessing magic.
#
# The new() method accepts arguments in the form of a hash. The following
# subarguments are supported:
#
# -cpudata # Collect per-CPU data from kstats if this is T
# -tags # Match PGs to physical relationships if this is T
# -swload # Collect software CPU load if this is T
# -retry # how many times to retry PG initialization when it fails
# -delay # Delay in seconds between retries
#
# The arguments are passed to _init().
#
sub new
{
my $class = shift;
my %args = @_;
my $retry_count = $args{-retry} || 0;
my $retry_delay = $args{-delay} || 1;
my $self = _init(@_);
#
# If PG initialization fails with EAGAIN error and the caller requested
# retries, retry initialization.
#
for (; !$self && ($! == &Errno::EAGAIN) && $retry_count;
$retry_count--) {
select(undef,undef,undef, $retry_delay);
$self = _init(@_);
}
if ($self) {
bless($self, $class) if defined($class);
bless($self) unless defined($class);
}
return ($self);
}
#
# Functions below use internal function _pg_get which returns PG hash reference
# corresponding to PG ID specified or 'undef' if the PG can't be found.
#
#
# All methods return 'undef' in scalar context and an empty list in list
# context when unrecoverable errors are detected.
#
#
# Return the root ID of PG hierarchy
#
sub root
{
scalar @_ == 1 or _usage("root(cookie)");
my $self = shift;
return unless $self->{PGTREE};
return ($ROOT_ID);
}
#
# Return list of all pgs numerically sorted In scalar context return number of
# PGs
#
sub all
{
scalar @_ == 1 or _usage("all(cookie)");
my $self = shift;
my $pgtree = $self->{PGTREE} or return;
my @ids = keys(%{$pgtree});
return (wantarray() ? _nsort(@ids) : scalar @ids);
}
#
# Return list of all pgs by walking the tree depth first.
#
sub all_depth_first
{
scalar @_ == 1 or _usage("all_depth_first(cookie)");
my $self = shift;
_walk_depth_first($self, $self->root());
}
#
# Return list of all pgs by walking the tree breadth first.
#
sub all_breadth_first
{
scalar @_ == 1 or _usage("all_breadth_first(cookie)");
my $self = shift;
_walk_breadth_first($self, $self->root());
}
#
# Return list of CPUs in the PG specified
# CPUs returned are numerically sorted
# In scalar context return number of CPUs
#
sub cpus
{
scalar @_ == 2 or _usage("cpus(cookie, pg)");
my $pg = _pg_get(shift, shift) or return;
my @cpus = @{$pg->{cpus}};
return (wantarray() ? _nsort(@cpus) : _collapse(@cpus));
}
#
# Return a parent for a given PG
# Returns undef if there is no parent
#
sub parent
{
scalar @_ == 2 or _usage("parent(cookie, pg)");
my $pg = _pg_get(shift, shift) or return;
my $parent = $pg->{parent};
return (defined($parent) && $parent >= 0 ? $parent : undef);
}
#
# Return list of children for a given PG
# In scalar context return list of children
#
sub children
{
scalar @_ == 2 or _usage("children(cookie, pg)");
my $pg = _pg_get(shift, shift) or return;
my $children = $pg->{children} or return;
my @children = @{$children};
return (wantarray() ? _nsort(@children) : scalar @children);
}
#
# Return sharing name for the PG
#
sub sh_name
{
scalar @_ == 2 or _usage("sh_name(cookie, pg)");
my $pg = _pg_get(shift, shift) or return;
return ($pg->{sh_name});
}
#
# Return T if specified PG ID is a leaf PG
#
sub is_leaf
{
scalar @_ == 2 or _usage("is_leaf(cookie, pg)");
my $pg = _pg_get(shift, shift) or return;
return ($pg->{is_leaf});
}
#
# Return leaf PGs
#
sub leaves
{
scalar @_ == 1 or _usage("leaves(cookie, pg)");
my $self = shift;
return (grep { is_leaf($self, $_) } $self->all());
}
#
# Update varying data in the snapshot
#
sub update
{
scalar @_ == 1 or _usage("update(cookie)");
my $self = shift;
my $ks = $self->{KSTAT};
$ks->update();
my $pgtree = $self->{PGTREE};
my $pg_info = $ks->{$self->{PG_MODULE}};
#
# Walk PG kstats and copy updated data from kstats to the snapshot
#
foreach my $id (keys %$pg_info) {
my $pg = $pgtree->{$id} or next;
my $pg_ks = _kstat_get_pg($pg_info, $id,
$self->{USE_OLD_KSTATS});
return unless $pg_ks;
#
# Update PG from kstats
#
$pg->{util} = $pg_ks->{hw_util};
$pg->{current_rate} = $pg_ks->{hw_util_rate};
$pg->{util_rate_max} = $pg_ks->{hw_util_rate_max};
$pg->{util_time_running} = $pg_ks->{hw_util_time_running};
$pg->{util_time_stopped} = $pg_ks->{hw_util_time_stopped};
$pg->{snaptime} = $pg_ks->{snaptime};
$pg->{generation} = $pg_ks->{generation};
}
#
# Update software load for each CPU
#
$self->{CPU_LOAD} = _get_sw_cpu_load($ks);
#
# Get hardware load per CPU
#
if ($self->{GET_CPU_DATA}) {
_get_hw_cpu_load($self);
}
return (1);
}
#
# Return list of physical tags for the given PG
#
sub tags
{
scalar @_ == 2 or _usage("tags(cookie, pg)");
my $pg = _pg_get(shift, shift) or return;
my $tags = $pg->{tags} or return;
my @tags = _uniq(@{$tags});
return (wantarray() ? @tags : join (',', @tags));
}
#
# Return list of sharing relationships in the snapshot Relationships are sorted
# by the level in the hierarchy If any PGs are given on the command line, only
# return sharing relationships for given PGs, but still keep them sorted.
#
sub sharing_relationships
{
scalar @_ or _usage("sharing_relationships(cookie, [pg, ...])");
my $self = shift;
my @pgs = $self->all_breadth_first();
if (scalar @_ > 0) {
#
# Caller specified PGs, remove any PGs not in caller's list
#
my %seen;
map { $seen{$_} = 1 } @_;
# Remove any PGs not provided by user
@pgs = grep { $seen{$_} } @pgs;
}
return (_uniq(map { $self->sh_name($_) } @pgs));
}
#
# Return PG generation number. If PG is specified in the argument, return its
# generation, otherwise return snapshot generation.
# Snapshot generation is calculated as the total of PG generations
#
sub generation
{
(scalar @_ == 1 || scalar @_ == 2) or _usage("generation(cookie, [pg])");
my $self = shift;
if (scalar @_ == 0) {
my @generations = map { $_->{generation} }
values %{$self->{PGTREE}};
return (sum(@generations));
} else {
my $id = shift;
my $pg = _pg_get($self, $id) or return;
return ($pg->{generation});
}
}
#
# Return level of PG in the tree, starting from root.
# PG level is cached in the $pg->{level} field.
#
sub level
{
scalar @_ == 2 or _usage("level(cookie, pg)");
my $self = shift;
my $pgid = shift;
my $pg = _pg_get($self, $pgid) or return;
return $pg->{level} if defined($pg->{level});
$pg->{level} = 0;
my $parent = _pg_get($self, $pg->{parent});
while ($parent) {
$pg->{level}++;
$parent = _pg_get($self, $parent->{parent});
}
return ($pg->{level});
}
#
# Return T if PG supports utilization We assume that utilization is supported by
# PG if it shows any non-zero time in util_time_running. It is possible that the
# same condition may be caused by cpustat(1) running ever since PG was created,
# but there is not much we can do about it.
#
sub has_utilization
{
scalar @_ == 2 or _usage("has_utilization(cookie, pg)");
my $pg = _pg_get(shift, shift) or return;
return ($pg->{util_time_running} != 0);
}
#
# Return utilization for the PG
# Utilization is a difference in utilization value between two snapshots.
# We can only compare utilization between PGs having the same generation ID.
#
sub utilization
{
scalar @_ == 3 or _usage("utilization(cookie, cookie1, pg");
my $c1 = shift;
my $c2 = shift;
my $id = shift;
#
# Since we have two cookies, update capacity in both
#
_capacity_update($c1, $c2, $id);
my $pg1 = _pg_get($c1, $id) or return;
my $pg2 = _pg_get($c2, $id) or return;
#
# Nothing to return if one of the utilizations wasn't measured
#
return unless ($pg1->{util_time_running} && $pg2->{util_time_running});
#
# Verify generation IDs
#
return unless $pg1->{generation} eq $pg2->{generation};
my $u1 = $pg1->{util};
my $u2 = $pg2->{util};
return unless defined ($u1) && defined ($u2);
return (abs($u2 - $u1));
}
#
# Return an estimate of PG capacity Capacity is calculated as the maximum of
# observed utilization expressed in units per second or maximum CPU frequency
# for all CPUs.
#
# We store capacity per sharing relationship, assuming that the same sharing has
# the same capacity. This may not be true for heterogeneous systems.
#
sub capacity
{
scalar @_ == 2 or _usage("capacity(cookie, pg");
my $self = shift;
my $pgid = shift;
my $pg = _pg_get($self, $pgid) or return;
my $shname = $pg->{sh_name} or return;
return (max($self->{MAX_FREQUENCY}, $self->{CAPACITY}->{$shname}));
}
#
# Return accuracy of utilization calculation between two snapshots The accuracy
# is determined based on the total time spent running and not running the
# counters. If T1 is the time counters were running during the period and T2 is
# the time they were turned off, the accuracy is T1 / (T1 + T2), expressed in
# percentages.
#
sub accuracy
{
scalar @_ == 3 or _usage("accuracy(cookie, cookie1, pg)");
my $c1 = shift;
my $c2 = shift;
my $id = shift;
my $trun;
my $tstop;
my $pg1 = _pg_get($c1, $id) or return;
my $pg2 = _pg_get($c2, $id) or return;
# Both PGs should have the same generation
return unless $pg1->{generation} eq $pg2->{generation};
#
# Get time spent with running and stopped counters
#
$trun = abs($pg2->{util_time_running} -
$pg1->{util_time_running});
$tstop = abs($pg2->{util_time_stopped} -
$pg1->{util_time_stopped});
my $total = $trun + $tstop;
#
# Calculate accuracy as percentage
#
my $accuracy = $total ? ($trun * 100) / $total : 0;
$accuracy = int($accuracy + 0.5);
$accuracy = 100 if $accuracy > 100;
return ($accuracy);
}
#
# Return time difference in seconds between two snapshots
#
sub tdelta
{
scalar @_ == 3 or _usage("tdelta(cookie, cookie1, pg)");
my $c1 = shift;
my $c2 = shift;
my $id = shift;
my $pg1 = _pg_get($c1, $id) or return;
my $pg2 = _pg_get($c2, $id) or return;
return unless $pg1->{generation} eq $pg2->{generation};
my $t1 = $pg1->{snaptime};
my $t2 = $pg2->{snaptime};
my $delta = abs($t1 - $t2);
return ($delta);
}
#
# Return software utilization between two snapshots
# In scalar context return software load as percentage.
# In list context return a list (USER, SYSTEM, IDLE, SWLOAD)
# All loads are returned as percentages
#
sub sw_utilization
{
scalar @_ == 3 or _usage("tdelta(cookie, cookie1, pg)");
my $c1 = shift;
my $c2 = shift;
my $id = shift;
my $pg1 = _pg_get($c1, $id) or return;
my $pg2 = _pg_get($c2, $id) or return;
return unless $pg1->{generation} eq $pg2->{generation};
my @cpus = $c1->cpus($id);
my $load1 = $c1->{CPU_LOAD};
my $load2 = $c2->{CPU_LOAD};
my $idle = 0;
my $user = 0;
my $sys = 0;
my $total = 0;
my $swload = 0;
foreach my $cpu (@cpus) {
my $ld1 = $load1->{$cpu};
my $ld2 = $load2->{$cpu};
next unless $ld1 && $ld2;
$idle += $ld2->{cpu_idle} - $ld1->{cpu_idle};
$user += $ld2->{cpu_user} - $ld1->{cpu_user};
$sys += $ld2->{cpu_sys} - $ld1->{cpu_sys};
}
$total = $idle + $user + $sys;
# Prevent division by zero
$total = 1 unless $total;
$swload = ($user + $sys) * 100 / $total;
$idle = $idle * 100 / $total;
$user = $user * 100 / $total;
$sys = $sys * 100 / $total;
return (wantarray() ? ($user, $sys, $idle, $swload) : $swload);
}
#
# Return utilization for the PG for a given CPU
# Utilization is a difference in utilization value between two snapshots.
# We can only compare utilization between PGs having the same generation ID.
#
sub cpu_utilization
{
scalar @_ == 4 or _usage("utilization(cookie, cookie1, pg, cpu");
my $c1 = shift;
my $c2 = shift;
my $id = shift;
my $cpu = shift;
my $idle = 0;
my $user = 0;
my $sys = 0;
my $swtotal = 0;
my $swload = 0;
#
# Since we have two cookies, update capacity in both
#
_capacity_update($c1, $c2, $id);
my $pg1 = _pg_get($c1, $id) or return;
my $pg2 = _pg_get($c2, $id) or return;
#
# Nothing to return if one of the utilizations wasn't measured
#
return unless ($pg1->{util_time_running} && $pg2->{util_time_running});
#
# Nothing to return if CPU data is missing
#
return unless $pg1->{cpudata} && $pg2->{cpudata};
#
# Verify generation IDs
#
return unless $pg1->{generation} eq $pg2->{generation};
#
# Get data for the given CPU
#
my $cpudata1 = $pg1->{cpudata}->{$cpu};
my $cpudata2 = $pg2->{cpudata}->{$cpu};
return unless $cpudata1 && $cpudata2;
return unless $cpudata1->{generation} == $cpudata2->{generation};
my $u1 = $cpudata1->{util};
my $u2 = $cpudata2->{util};
return unless defined ($u1) && defined ($u2);
my $hw_utilization = abs ($u1 - $u2);
#
# Get time spent with running and stopped counters
#
my $trun = abs($cpudata1->{util_time_running} -
$cpudata2->{util_time_running});
my $tstop = abs($cpudata1->{util_time_stopped} -
$cpudata2->{util_time_stopped});
my $total = $trun + $tstop;
#
# Calculate accuracy as percentage
#
my $accuracy = $total ? ($trun * 100) / $total : 0;
$accuracy = int($accuracy + 0.5);
$accuracy = 100 if $accuracy > 100;
my $t1 = $cpudata1->{snaptime};
my $t2 = $cpudata2->{snaptime};
my $tdelta = abs ($t1 - $t2);
my $shname = $pg2->{sh_name} or return;
my $capacity = max($c2->{MAX_FREQUENCY}, $c2->{CAPACITY}->{$shname});
my $utilization = $hw_utilization / $tdelta;
$capacity = $utilization unless $capacity;
$utilization /= $capacity;
$utilization *= 100;
my $ld1 = $c1->{CPU_LOAD}->{$cpu};
my $ld2 = $c2->{CPU_LOAD}->{$cpu};
if ($ld1 && $ld2) {
$idle = $ld2->{cpu_idle} - $ld1->{cpu_idle};
$user = $ld2->{cpu_user} - $ld1->{cpu_user};
$sys = $ld2->{cpu_sys} - $ld1->{cpu_sys};
$swtotal = $idle + $user + $sys;
# Prevent division by zero
$swtotal = 1 unless $swtotal;
$swload = ($user + $sys) * 100 / $swtotal;
$idle = $idle * 100 / $swtotal;
$user = $user * 100 / $swtotal;
$sys = $sys * 100 / $swtotal;
}
return (wantarray() ?
($utilization, $accuracy, $hw_utilization,
$swload, $user, $sys, $idle) :
$utilization);
}
#
# online_cpus(kstat)
# Return list of on-line CPUs
#
sub online_cpus
{
scalar @_ == 1 or _usage("online_cpus(cookie)");
my $self = shift or return;
my $ks = $self->{KSTAT} or return;
my $cpu_info = $ks->{cpu_info} or return;
my @cpus = grep {
my $cp = $cpu_info->{$_}->{"cpu_info$_"};
my $state = $cp->{state};
$state eq 'on-line' || $state eq 'no-intr';
} keys %{$cpu_info};
return (wantarray() ? @cpus : _nsort(@cpus));
}
#
# Support methods
#
# The following methods are not PG specific but are generally useful for PG
# interface consumers
#
#
# Sort the list numerically
#
sub nsort
{
scalar @_ > 0 or _usage("nsort(cookie, val, ...)");
shift;
return (_nsort(@_));
}
#
# Return the input list with duplicates removed.
# Should be used in list context
#
sub uniq
{
scalar @_ > 0 or _usage("uniq(cookie, val, ...)");
shift;
return (_uniq(@_));
}
#
# Sort list numerically and remove duplicates
# Should be called in list context
#
sub uniqsort
{
scalar @_ > 0 or _usage("uniqsort(cookie, val, ...)");
shift;
return (_uniqsort(@_));
}
#
# Expand all arguments and present them as a numerically sorted list
# x,y is expanded as (x y)
# 1-3 ranges are expandes as (1 2 3)
#
sub expand
{
scalar @_ > 0 or _usage("expand(cookie, val, ...)");
shift;
return (_uniqsort(map { _expand($_) } @_));
}
#
# Consolidate consecutive ids as start-end
# Input: list of ids
# Output: string with space-sepated cpu values with ranges
# collapsed as x-y
#
sub id_collapse
{
scalar @_ > 0 or _usage("collapse(cookie, val, ...)");
shift;
return _collapse(@_);
}
#
# Return elements of the second list not present in the first list. Both lists
# are passed by reference.
#
sub set_subtract
{
scalar @_ == 3 or _usage("set_subtract(cookie, left, right)");
shift;
return (_set_subtract(@_));
}
#
# Return the intersection of two lists passed by reference
# Convert the first list to a hash with seen entries marked as 1-values
# Then grep only elements present in the first list from the second list.
# As a little optimization, use the shorter list to build a hash.
#
sub intersect
{
scalar @_ == 3 or _usage("intersect(cookie, left, right)");
shift;
return (_set_intersect(@_));
}
#
# Return elements of the second list not present in the first list. Both lists
# are passed by reference.
#
sub _set_subtract
{
my ($left, $right) = @_;
my %seen; # Set to 1 for everything in the first list
# Create a hash indexed by elements in @left with ones as a value.
map { $seen{$_} = 1 } @$left;
# Find members of @right present in @left
return (grep { ! $seen{$_} } @$right);
}
#
# END OF PUBLIC INTERFACE
#
#
# INTERNAL FUNCTIONS
#
#
# _usage(): print error message and terminate the program.
#
sub _usage
{
my $msg = shift;
Carp::croak "Usage: Sun::Solaris::Pg::$msg";
}
#
# Sort the list numerically
# Should be called in list context
#
sub _nsort
{
return (sort { $a <=> $b } @_);
}
#
# Return the input list with duplicates removed.
# Should be used in list context
#
sub _uniq
{
my %seen;
return (grep { ++$seen{$_} == 1 } @_);
}
#
# Sort list numerically and remove duplicates
# Should be called in list context
#
sub _uniqsort
{
return (sort { $a <=> $b } _uniq(@_));
}
# Get PG from the snapshot by id
sub _pg_get
{
my $self = shift;
my $pgid = shift;
return unless defined $pgid;
my $pgtree = $self->{PGTREE} or return;
return ($pgtree->{$pgid});
}
#
# Copy data from kstat representation to our representation
# Arguments:
# PG kstat
# Reference to the list of CPUs.
# Any CPUs in the PG kstat not present in the CPU list are ignored.
#
sub _pg_create_from_kstat
{
my $pg_ks = shift;
my $all_cpus = shift;
my %all_cpus;
my $pg = ();
#
# Mark CPUs available
#
map { $all_cpus{$_}++ } @$all_cpus;
return unless $pg_ks;
#
# Convert CPU list in the kstat from x-y,z form to the proper list
#
my @cpus = _expand($pg_ks->{cpus});
#
# Remove any CPUs not present in the arguments
#
@cpus = grep { $all_cpus{$_} } @cpus;
#
# Do not create PG unless it has any CPUs
#
return unless scalar @cpus;
#
# Copy data to the $pg structure
#
$pg->{ncpus} = scalar @cpus;
$pg->{cpus} = \@cpus;
$pg->{id} = defined($pg_ks->{pg_id}) ? $pg_ks->{pg_id} : $pg_ks->{id};
$pg->{util} = $pg_ks->{hw_util};
$pg->{current_rate} = $pg_ks->{hw_util_rate};
$pg->{util_rate_max} = $pg_ks->{hw_util_rate_max};
$pg->{util_time_running} = $pg_ks->{hw_util_time_running};
$pg->{util_time_stopped} = $pg_ks->{hw_util_time_stopped};
$pg->{snaptime} = $pg_ks->{snaptime};
$pg->{generation} = $pg_ks->{generation};
$pg->{sh_name} = $pg_ks->{relationship} || $pg_ks->{sharing_relation};
$pg->{parent} = $pg_ks->{parent_pg_id};
$pg->{parent} = PG_PARENT_UNDEF unless defined $pg->{parent};
#
# Replace spaces with underscores in sharing names
#
$pg->{sh_name} =~ s/ /_/g;
$pg->{is_leaf} = 1;
return $pg;
}
#
# Create fake root PG with all CPUs
# Arguments: list of CPUs
#
sub _pg_create_root
{
my $pg = ();
my @cpus = @_;
$pg->{id} = $ROOT_ID;
$pg->{ncpus} = scalar @cpus;
$pg->{util} = 0;
$pg->{current_rate} = 0;
$pg->{util_rate_max} = 0;
$pg->{util_time_running} = 0;
$pg->{util_time_stopped} = 0;
$pg->{snaptime} = 0;
$pg->{generation} = 0;
$pg->{sh_name} = 'System';
$pg->{is_leaf} = 0;
$pg->{cpus} = \@cpus;
$pg->{parent} = PG_NO_PARENT;
return ($pg);
}
#
# _pg_all_from_kstats(SNAPSHOT)
# Extract all PG information from kstats
#
sub _pg_all_from_kstats
{
my $self = shift;
my $ks = $self->{KSTAT};
my @all_cpus = @{$self->{CPUS}};
return unless $ks;
my $pgtree = ();
my $pg_info = $ks->{$self->{PG_MODULE}};
#
# Walk all PG kstats and copy them to $pgtree->{$id}
#
foreach my $id (keys %$pg_info) {
my $pg_ks = _kstat_get_pg($pg_info, $id,
$self->{USE_OLD_KSTATS});
next unless $pg_ks;
my $pg = _pg_create_from_kstat($pg_ks, \@all_cpus);
$pgtree->{$id} = $pg if $pg;
}
#
# OS does not have root PG, so create one.
#
if (!$pgtree->{$ROOT_ID}) {
$pgtree->{$ROOT_ID} = _pg_create_root (@all_cpus);
}
#
# Construct parent-child relationships between PGs
#
#
# Get list of PGs sorted by number of CPUs
# If two PGs have the same number of CPUs, sort by relationship order.
#
my @lineage = sort {
$a->{ncpus} <=> $b->{ncpus} ||
_relationship_order($a->{sh_name}) <=>
_relationship_order($b->{sh_name})
} values %$pgtree;
#
# For each PG in the lineage discover its parent if it doesn't have one.
#
for (my $i = 0; $i < scalar @lineage; $i++) {
my $pg = $lineage[$i];
#
# Ignore PGs which already have parent in kstats
#
my $parent = $pg->{parent};
next if ($parent >= PG_NO_PARENT);
my $ncpus = $pg->{ncpus};
my @cpus = @{$pg->{cpus}};
#
# Walk the lineage, ignoring any CPUs with the same number of
# CPUs
for (my $j = $i + 1; $j < scalar @lineage; $j++) {
my $pg1 = $lineage[$j];
my @parent_cpus = @{$pg1->{cpus}};
if (_is_subset(\@cpus, \@parent_cpus)) {
$pg->{parent} = $pg1->{id};
last;
}
}
}
#
# Find all top-level PGs and put them under $root
#
foreach my $pgid (keys %$pgtree) {
next if $pgid == $ROOT_ID;
my $pg = $pgtree->{$pgid};
$pg->{parent} = $ROOT_ID unless $pg->{parent} >= 0;
}
#
# Now that we know parents, for each parent add all direct children to
# their parent sets
#
foreach my $pg (@lineage) {
my $parentid = $pg->{parent};
next unless defined $parentid;
my $parent = $pgtree->{$parentid};
push (@{$parent->{children}}, $pg->{id});
}
return ($pgtree);
}
#
# Read kstats and initialize PG object
# Collect basic information about cmt_pg
# Add list of children and list of CPUs
# Returns the hash reference indexed by pg id
#
# The _init() function accepts arguments in the form of a hash. The following
# subarguments are supported:
#
# -cpudata # Collect per-CPU data from kstats if this is T
# -tags # Match PGs to physical relationships if this is T
# -swload # Collect software CPU load if this is T
sub _init
{
my $ks = Sun::Solaris::Kstat->new(strip_strings => 1);
return unless $ks;
my %args = @_;
my $get_cpu_data = $args{-cpudata};
my $get_tags = $args{-tags};
my $get_swload = $args{-swload};
my $self;
my $use_old_kstat_names = scalar(grep {/^pg_hw_perf/ } keys (%$ks)) == 0;
my @frequencies;
$self->{MAX_FREQUENCY} = 0;
$self->{PG_MODULE} = $use_old_kstat_names ? 'pg' : 'pg_hw_perf';
$self->{PG_CPU_MODULE} = $use_old_kstat_names ?
'pg_cpu' : 'pg_hw_perf_cpu';
$self->{USE_OLD_KSTATS} = $use_old_kstat_names;
$get_cpu_data = 0 unless scalar(grep {/^$self->{PG_CPU_MODULE}/ }
keys (%$ks));
# Get list of PG-related kstats
my $pg_keys = $use_old_kstat_names ? 'pg' : 'pg_hw';
if (scalar(grep { /^$pg_keys/ } keys (%$ks)) == 0) {
if (exists(&Errno::ENOTSUPP)) {
$! = &Errno::ENOTSUPP;
} else {
$! = 48;
}
return;
}
#
# Mapping of cores and chips to CPUs
#
my $hw_mapping;
#
# Get list of all CPUs
#
my $cpu_info = $ks->{cpu_info};
#
# @all-cpus is a list of all cpus
#
my @all_cpus = keys %$cpu_info;
#
# Save list of all CPUs in the snapshot
#
$self->{CPUS} = \@all_cpus;
#
# Find CPUs for each socket and chip
# Also while we scan CPU kstats, get maximum frequency of each CPU.
#
foreach my $id (@all_cpus) {
my $ci = $cpu_info->{$id}->{"cpu_info$id"};
next unless $ci;
my $core_id = $ci->{core_id};
my $chip_id = $ci->{chip_id};
push(@{$hw_mapping->{core}->{$core_id}}, $id)
if defined $core_id;
push(@{$hw_mapping->{chip}->{$chip_id}}, $id)
if defined $chip_id;
# Read CPU frequencies separated by commas
my $freqs = $ci->{supported_frequencies_Hz};
my $max_freq = max(split(/:/, $freqs));
# Calculate maximum frequency for the snapshot.
$self->{MAX_FREQUENCY} = $max_freq if
$self->{MAX_FREQUENCY} < $max_freq;
}
$self->{KSTAT} = $ks;
#
# Convert kstats to PG tree
#
my $pgtree = _pg_all_from_kstats($self);
$self->{PGTREE} = $pgtree;
#
# Find capacity estimate per sharing relationship
#
foreach my $pgid (keys %$pgtree) {
my $pg = $pgtree->{$pgid};
my $shname = $pg->{sh_name};
my $max_rate = $pg->{util_rate_max};
$self->{CAPACITY}->{$shname} = $max_rate if
!$self->{CAPACITY}->{$shname} ||
$self->{CAPACITY}->{$shname} < $max_rate;
}
if ($get_tags) {
#
# Walk all PGs and mark all PGs that have corresponding hardware
# entities (system, chips, cores).
#
foreach my $pgid (keys %$pgtree) {
my $pg = $pgtree->{$pgid};
my @cpus = @{$pg->{cpus}};
next unless scalar @cpus > 1;
if (_set_equal (\@cpus, \@all_cpus)) {
#
# PG has all CPUs in the system.
#
push (@{$pg->{tags}}, 'system');
}
foreach my $name ('core', 'chip') {
my $hwdata = $hw_mapping->{$name};
foreach my $id (keys %$hwdata) {
# CPUs for this entity
my @hw_cpus = @{$hwdata->{$id}};
if (_set_equal (\@cpus, \@hw_cpus)) {
#
# PG has exactly the same CPUs
#
push (@{$pg->{tags}}, $name);
}
}
}
}
}
#
# Save software load for each CPU
#
if ($get_swload) {
$self->{CPU_LOAD} = _get_sw_cpu_load($ks);
}
#
# Collect per-CPU utilization data if requested
#
if ($get_cpu_data) {
_get_hw_cpu_load($self);
}
$self->{GET_CPU_DATA} = $get_cpu_data;
#
# Verify that in the end we have the same PG generation for each PG
#
if (! _same_generation($self)) {
$! = &Errno::EAGAIN;
return;
}
return ($self);
}
#
# Verify that topology is the same as at the time snapshot was created
#
sub _same_generation
{
my $self = shift;
my $pgtree = $self->{PGTREE} or return;
return (0) unless $self;
my $ks = $self->{KSTAT};
$ks->update();
my $pg_info = $ks->{$self->{PG_MODULE}};
foreach my $id (keys %$pg_info) {
my $pg = $pgtree->{$id} or next;
my $pg_ks = _kstat_get_pg($pg_info, $id,
$self->{USE_OLD_KSTATS});
return unless $pg_ks;
return (0) unless $pg->{generation} == $pg_ks->{generation};
}
return (1);
}
#
# Update capacity for both PGs
#
sub _capacity_update
{
my $c1 = shift;
my $c2 = shift;
my $pgtree1 = $c1->{PGTREE};
my $pgtree2 = $c2->{PGTREE};
foreach my $pgid (keys %$pgtree1) {
my $pg1 = $pgtree1->{$pgid};
my $pg2 = $pgtree2->{$pgid};
next unless $pg1 && $pg2;
next unless $pg1->{generation} != $pg2->{generation};
my $shname1 = $pg1->{sh_name};
my $shname2 = $pg2->{sh_name};
next unless $shname1 eq $shname2;
my $max_rate = max($pg1->{util_rate_max}, $pg2->{util_rate_max});
my $utilization = abs($pg1->{util} - $pg2->{util});
my $tdelta = abs($pg1->{snaptime} - $pg2->{snaptime});
$utilization /= $tdelta if $utilization && $tdelta;
$max_rate = $utilization if
$utilization && $max_rate < $utilization;
$c1->{CAPACITY}->{$shname1} = $max_rate if
!$c1->{CAPACITY}->{$shname1} ||
!$c1->{CAPACITY}->{$shname1} < $max_rate;
$c2->{CAPACITY}->{$shname2} = $max_rate if
!$c2->{CAPACITY}->{$shname2} ||
!$c2->{CAPACITY}->{$shname2} < $max_rate;
}
}
#
# Return list of PGs breadth first
#
sub _walk_depth_first
{
my $p = shift;
# Nothing to do if list is empty
return unless scalar (@_);
return (map { ($_, _walk_depth_first ($p, $p->children($_))) } @_);
}
#
# Return list of PGs breadth first
#
sub _walk_breadth_first
{
my $p = shift;
# Nothing to do if list is empty
return unless scalar (@_);
return (@_, _walk_breadth_first($p, map { $p->children($_) } @_));
}
#
# Given the kstat reference (already hashed by module name) and PG ID return the
# corresponding kstat.
#
sub _kstat_get_pg
{
my $mod = shift;
my $pgid = shift;
my $use_old_kstats = shift;
my $id_field = $use_old_kstats ? 'id' : 'pg_id';
return ($mod->{$pgid}->{hardware}) if $use_old_kstats;
my @instances = grep { $_->{$id_field} == $pgid }
values(%{$mod->{$pgid}});
return ($instances[0]);
}
######################################################################
# Set routines
#######################################################################
#
# Return T if one list contains all the elements of another list.
# All lists are passed by reference
#
sub _is_subset
{
my ($left, $right) = @_;
my %seen; # Set to 1 for everything in the first list
# Put the shortest list in $left
Carp::croak "invalid left argument" unless ref ($left) eq 'ARRAY';
Carp::croak "invalid right argument" unless ref ($right) eq 'ARRAY';
# Create a hash indexed by elements in @right with ones as a value.
map { $seen{$_} = 1 } @$right;
# Find members of @left not present in @right
my @extra = grep { !$seen{$_} } @$left;
return (!scalar(@extra));
}
sub _is_member
{
my $set = shift;
my $element = shift;
my %seen;
map { $seen{$_} = 1 } @$set;
return ($seen{$element});
}
#
# Return T if C1 and C2 contain the same elements
#
sub _set_equal
{
my $c1 = shift;
my $c2 = shift;
return 0 unless scalar @$c1 == scalar @$c2;
return (_is_subset($c1, $c2) && _is_subset($c2, $c1));
}
#
# Return the intersection of two lists passed by reference
# Convert the first list to a hash with seen entries marked as 1-values
# Then grep only elements present in the first list from the second list.
# As a little optimization, use the shorter list to build a hash.
#
sub _set_intersect
{
my ($left, $right) = @_;
my %seen; # Set to 1 for everything in the first list
# Put the shortest list in $left
scalar @$left <= scalar @$right or ($right, $left) = ($left, $right);
# Create a hash indexed by elements in @left with ones as a value.
map { $seen{$_} = 1 } @$left;
# Find members of @right present in @left
return (grep { $seen{$_} } @$right);
}
#
# Expand start-end into the list of values
# Input: string containing a single numeric ID or x-y range
# Output: single value or a list of values
# Ranges with start being more than end are inverted
#
sub _expand
{
# Skip the first argument if it is the object reference
shift if ref $@[0] eq 'HASH';
my $arg = shift;
return unless defined $arg;
my @args = split /,/, $arg;
return map { _expand($_) } @args if scalar @args > 1;
$arg = shift @args;
return unless defined $arg;
if ($arg =~ m/^\d+$/) {
# single number
return ($arg);
} elsif ($arg =~ m/^(\d+)\-(\d+)$/) {
my ($start, $end) = ($1, $2); # $start-$end
# Reverse the interval if start > end
($start, $end) = ($end, $start) if $start > $end;
return ($start .. $end);
} else {
return $arg;
}
return;
}
#
# Consolidate consecutive ids as start-end
# Input: list of ids
# Output: string with space-sepated cpu values with ranges
# collapsed as x-y
#
sub _collapse
{
return ('') unless @_;
my @args = _uniqsort(@_);
my $start = shift(@args);
my $result = '';
my $end = $start; # Initial range consists of the first element
foreach my $el (@args) {
if (!$el =~ /^\d+$/) {
$result = "$result $el";
$end = $el;
} elsif ($el == ($end + 1)) {
#
# Got consecutive ID, so extend end of range without
# printing anything since the range may extend further
#
$end = $el;
} else {
#
# Next ID is not consecutive, so print IDs gotten so
# far.
#
if ($end > $start + 1) { # range
$result = "$result $start-$end";
} elsif ($end > $start) { # different values
$result = "$result $start $end";
} else { # same value
$result = "$result $start";
}
# Try finding consecutive range starting from this ID
$start = $end = $el;
}
}
# Print last ID(s)
if (! ($end =~ /^\d+$/)) {
$result = "$result $end";
} elsif ($end > $start + 1) {
$result = "$result $start-$end";
} elsif ($end > $start) {
$result = "$result $start $end";
} else {
$result = "$result $start";
}
# Remove any spaces in the beginning
$result =~ s/^\s+//;
return ($result);
}
#
# get relationship order from relationship name.
# return 0 for all unknown names.
#
sub _relationship_order
{
my $name = shift;
return ($relationships_order{$name} || 0);
}
#
# Get software load for each CPU from kstats
# Argument: kstat reference
# Returns: reference to the hash with
# cpu_idle, cpu_user, cpu_sys keys.
#
sub _get_sw_cpu_load
{
my $ks = shift or return;
my $loads;
my $sys_ks = $ks->{cpu};
foreach my $cpu (keys %$sys_ks) {
my $sys = $sys_ks->{$cpu}->{sys};
$loads->{$cpu}->{cpu_idle} = $sys->{cpu_ticks_idle};
$loads->{$cpu}->{cpu_user} = $sys->{cpu_ticks_user};
$loads->{$cpu}->{cpu_sys} = $sys->{cpu_ticks_kernel};
}
return ($loads);
}
#
# Get software load for each CPU from kstats
# Arguments:
# pgtree reference
# kstat reference
#
# Returns: nothing
# Stores CPU load in the $pg->{cpudata} hash for each PG
#
sub _get_hw_cpu_load
{
my $self = shift;
my $pgtree = $self->{PGTREE};
my $ks = $self->{KSTAT};
my $pg_cpu_ks = $ks->{$self->{PG_CPU_MODULE}};
foreach my $pgid (keys %$pgtree) {
my $pg = $pgtree->{$pgid};
my @cpus = @{$pg->{cpus}};
my $cpu;
my $pg_id;
foreach my $cpu (keys %$pg_cpu_ks) {
next unless _is_member(\@cpus, $cpu);
my $cpu_hw_data = $pg_cpu_ks->{$cpu};
foreach my $hw (keys %$cpu_hw_data) {
my $cpudata = $cpu_hw_data->{$hw};
#
# Only consider information for this PG
#
next unless $cpudata->{pg_id} == $pgid;
$pg->{cpudata}->{$cpu}->{generation} =
$cpudata->{generation};
$pg->{cpudata}->{$cpu}->{util} =
$cpudata->{hw_util};
$pg->{cpudata}->{$cpu}->{util_time_running} =
$cpudata->{hw_util_time_running};
$pg->{cpudata}->{$cpu}->{util_time_stopped} =
$cpudata->{hw_util_time_stopped};
$pg->{cpudata}->{$cpu}->{snaptime} =
$cpudata->{snaptime};
}
}
}
}
1;
__END__
#
# The information about PG hierarchy is contained in a object return by the
# new() method.
#
# This module can deal with old PG kstats that have 'pg' and 'pg_cpu' as module
# names as well as new PG kstats which use 'pg_hw_perf' and ''pg_hw_perf_cpu' as
# the module name.
#
# The object contains the following fields:
#
# CPUS List of all CPUs present.
# CAPACITY Estimate of capacity for each sharing
# PGTREE The PG tree. See below for the tree representation.
#
# PG_MODULE Module name for the PG kstats. It is either 'pg' for
# old style kstats, or 'pg_hw_perf' for new style kstats.
#
# MAX_FREQUENCY Maximum CPU frequency
# USE_OLD_KSTATS True if we are dealing with old style kstats
# KSTAT The kstat object used to generate this hierarchy.
#
# The PG tree is represented as a hash table indexed by PG ID. Each element of
# the table is the hash reference with the following fields:
#
# children Reference to the list of children PG IDs
# cpus Reference to the list of cpu IDs in the PG
# current_rate Current utilization rate
# generation PG generation
# id PG id
# ncpus number of CPUs in the PG
# parent PG parent id, or -1 if there is none.
# sh_name Sharing name
# snaptime Snapshot time
# util Hardware utilization
# util_rate_max Maximum utilization rate
# util_time_running Time (in nanoseconds) when utilization data is collected
# util_time_stopped Time when utilization data is not collected
#
# The fields (with the exception of 'children') are a copy of the data from
# kstats.
#
# The PG hierarchy in the kernel does not have the root PG. We simulate the root
# (System) PG which is the parent of top level PGs in the system. This PG always
# has ID 0.
#