/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* These functions implement the process of commitment for a pool
* configuration. This process can be described as taking instructions
* from a static configuration file and using the information about
* the target system contained in the dynamic configuration to make
* decisions about how best to allocate resources to meet the
* constraints specified in the static configuration file.
*
* Mechanically, this process relies upon ordering the individual
* components of the file and stepping through the lists of components
* and taking actions depending on their type and which file they are
* part of.
*
* Configuration components can be broken down into different types
* which are then treated according to the following table:
*
* Element Type Action
* system || pool ||
* res_comp || res_agg If the element is a required element, then create or
* update it (don't destroy required elements in the
* static configuration) otherwise manipulate the
* dynamic configuration to create, destroy or update
* the element on the system.
* comp Create, destroy or update the static configuration
* component.
*
* The treatment of the different elements reflects the fact that all
* elements other than comp are configurable and thus libpool can
* create, destroy and modify these elements at will. comp elements
* reflect the disposition of the system, these elements can be moved
* around but they can't be created or destroyed in the dynamic
* configuration in the commit process. comp elements can be created
* and destroyed in the static configuration file as a result of a
* commit operation, since it's possible for a comp to not appear in
* the dynamic configuration. For instance, if the static
* configuration file was created on a different machine or after a DR
* operation which has removed or added components.
*
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <limits.h>
#include <unistd.h>
#include <pool.h>
#include "pool_internal.h"
#include "pool_impl.h"
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define POA_IMPORTANCE_NUM 0
/*
* This resource specific structure is used to determine allocation of resources
* during resource set allocation. Each set will receive its min, plus
* some number of dealt resources based on the global allocation policy.
*/
typedef struct res_info {
/* The signed quantity of resources */
/* to tranfer into or out of this */
/* resource set */
/* + transfer: tranfer resources out */
/* - transfer: tranfer resources in */
} res_info_t;
/*
* diff_and_fix operations
*/
static int commit_delete(pool_elem_t *);
/*
* configuration commit processing
*/
pool_conf_t *);
static int process_lists(int, pool_conf_t *,
pool_conf_t *, int);
static int share_resources(pool_conf_t *);
static int resource_allocate(const char *, pool_resource_t **,
uint_t);
static int resource_compare_by_descending_importance(const void *,
const void *);
static int compute_size_to_transfer(const void *, const void *);
static int add_importance_props(pool_conf_t *);
static int remove_importance_props(pool_conf_t *);
const char *, pool_value_t *, void *);
const char *, pool_value_t *, void *);
/*
* commit_create() is used to create a configuration element upon the
* system. Since only pools and resource actually need to perform any
* action, other elements are ignored as a no-op.
*/
static int
{
const char *res_type;
char *name;
switch (pool_elem_class(src)) {
case PEC_SYSTEM: /* NO-OP */
break;
case PEC_POOL:
return (PO_FAIL);
}
/*
* Now copy the properties from the original pool to the
* new one
*/
clone_element) != PO_SUCCESS)
return (PO_FAIL);
/*
* Add a pointer to the src element which can be
* updated with a sys_id when the sys_id is allocated
* to the created element.
*/
break;
case PEC_RES_COMP:
case PEC_RES_AGG:
NULL) {
return (PO_FAIL);
}
/*
* Need to do some ordering of property updates.
* smin first, else update the max first.
*/
return (PO_FAIL);
&val) != PO_SUCCESS)
return (PO_FAIL);
} else {
&val) != PO_SUCCESS)
return (PO_FAIL);
}
/*
* Now copy the properties from the original resource
* to the new one
*/
clone_element) != PO_SUCCESS)
return (PO_FAIL);
/*
* Add a pointer to the src element which can be
* updated with a sys_id when the sys_id is allocated
* to the created element.
*/
break;
case PEC_COMP: /* NO-OP */
break;
default:
return (PO_FAIL);
}
return (PO_SUCCESS);
}
/*
* commit_delete() is used to delete a configuration element upon the
* system. Since only pools and resources actually need to perform
* any action, other elements are ignored as a no-op.
*/
static int
{
int ret = 0;
if (elem_is_tmp(pe))
return (PO_SUCCESS);
switch (pool_elem_class(pe)) {
case PEC_SYSTEM: /* NO-OP */
break;
case PEC_POOL:
break;
case PEC_RES_COMP:
case PEC_RES_AGG:
break;
case PEC_COMP: /* NO-OP */
break;
default:
return (PO_FAIL);
}
return (ret);
}
/*
* commit_update() is used to update a configuration element upon the
* system or in a static configuration file. The pass parameter
* governs whether properties are being updated or associations. In
* pass 0, properties are updated. If the element is of class
* PEC_COMP, then make sure that the element in the static
* configuration file is correctly located before proceeding with the
* update. Then, the element in the dynamic configuration file is
* updated. In pass 1, ie. pass != 0, any pool components have their
* associations updated in the dynamic configuration.
*/
static int
{
if (pass == 0) {
pool_elem_comp(e1));
pool_elem_comp(e2));
char *name;
name);
#ifdef DEBUG
dprintf("transferring: res, comp\n");
#endif /* DEBUG */
}
}
clean_element) != PO_SUCCESS) {
return (PO_FAIL);
}
/*
* Need to do some ordering of property updates if the
* element to be updated is a resource. Compare the
* else update the max first.
*/
PO_SUCCESS ||
PO_SUCCESS ||
return (PO_FAIL);
&val) != PO_SUCCESS)
return (PO_FAIL);
} else {
&val) != PO_SUCCESS)
return (PO_FAIL);
}
}
/*
* This next couple of steps needs some
* explanation. The first walk, copies all the
* properties that are writeable from the static
* configuration to the dynamic configuration. The
* second walk copies all properties (writeable or
* not) from the dynamic configuration element back to
* the static configuration element. This ensures that
* updates from the static configuration element are
* correctly applied to the dynamic configuration and
* then the static configuration element is updated
* with the latest values of the read-only xproperties
* from the dynamic configuration element. The
* enforcing of permisssions is performed in
* clone_element by its choice of property
* manipulation function.
*/
PO_SUCCESS) {
return (PO_FAIL);
}
PO_SUCCESS) {
return (PO_FAIL);
}
} else {
int i;
return (PO_FAIL);
for (i = 0; i < nelem; i++) {
char *res_name =
if ((tgt_res = pool_get_resource(
NULL) {
rs[i]);
}
PO_SUCCESS) {
return (PO_FAIL);
}
}
}
}
}
return (PO_SUCCESS);
}
/*
* diff_and_fix() works out the differences between two configurations
* and modifies the state of the system to match the operations
* required to bring the two configurations into sync.
*
* Returns PO_SUCCESS/PO_FAIL.
*/
static int
{
/*
* The ordering of the operations is significant, we must
* process the system element, then the pools elements, then
* the resource elements, then the pools elements again and
* finally the resource components.
*
* TODO
* PEC_RES_COMP are the only type of resources
* currently. When PEC_RES_AGG resources are added they must
* also be processed.
*/
return (PO_FAIL);
}
return (PO_FAIL);
}
return (PO_FAIL);
}
return (PO_FAIL);
}
return (PO_FAIL);
}
/*
* Share the resources. It has to be called for both
* configurations to ensure that the configurations still look
* the same.
*/
return (PO_FAIL);
}
return (PO_FAIL);
}
return (PO_SUCCESS);
}
static int
{
return (PO_FAIL);
}
} else if (! elem_is_default(pe)) {
return (PO_FAIL);
}
}
return (PO_SUCCESS);
}
static int
{
const char *resname;
const char *restype;
/*
* I have to find the right parent in the static
* configuration. It may not exist, in which case it's
* correct to put it in the default
*/
pool_elem_comp(pe));
return (PO_FAIL);
return (PO_FAIL);
return (PO_FAIL);
if (parent_res == NULL)
/*
* Now need to make a copy of the component in the
* dynamic configuration in the static configuration.
*/
return (PO_FAIL);
clone_element) != PO_SUCCESS)
return (PO_FAIL);
} else if (elem_is_default(pe)) {
char *name;
return (PO_FAIL);
switch (pool_elem_class(pe)) {
case PEC_POOL:
return (PO_FAIL);
}
return (PO_FAIL);
break;
case PEC_RES_AGG:
case PEC_RES_COMP:
NULL) {
return (PO_FAIL);
}
return (PO_FAIL);
break;
default:
break;
}
} else {
return (PO_FAIL);
}
return (PO_SUCCESS);
}
/*
* This function compares the elements of the supplied type in the
* static and dynamic configurations supplied. The lists of elements
* are compared and used to create, delete and updated elements in
* both the static and dynamic configurations. The pass parameter is
* used to indicate to commit_update() whether property updates or
* association updates should be performed.
*/
static int
{
int i, j;
return (PO_FAIL);
return (PO_FAIL);
}
/*
* Step through and do the updating, remember that we are
* comparing using the compare function for the configuration
* and that is fixed.
*/
i = j = 0;
int compare;
/*
* We are going to do this by stepping through the static
* list first.
*/
if (elem_is_default(stc_elems[i]) &&
elem_is_default(dyn_elems[j]))
compare = 0;
else
dyn_elems[j]);
if (compare < 0) {
i++;
} else if (compare > 0) {
j++;
} else { /* compare == 0 */
!= PO_SUCCESS) {
}
i++;
j++;
}
}
return (PO_FAIL);
}
i++;
}
return (PO_FAIL);
}
j++;
}
return (status);
}
/*
* get_elem_list() returns a list of pool_elem_t's. The size of the
* list is written into nelem. The list contains elements of all types
* that pools is interested in: i.e. system, pool, resources and
* resource components. It is the caller's responsibility to free the
* list when it is finished with.
*
* The array of pointers returned by the type specific query can be
* safely cast to be an array of pool_elem_t pointers. In the case of
* PEC_RES_COMP some additional processing is required to qualify the
* list of elements.
*
* Returns a pointer to a list of pool_elem_t's or NULL on failure.
*/
static pool_elem_t **
{
int i;
switch (type) {
case PEC_SYSTEM:
return (NULL);
*nelem = 1;
break;
case PEC_POOL:
}
break;
case PEC_RES_COMP:
int j = 0;
for (i = 0; i < *nelem; i++) {
}
*nelem = j;
}
break;
case PEC_COMP:
}
break;
default:
abort();
break;
}
return (elems);
}
/*
* share_resources() sets up the allocation of resources by each
* provider. Firstly all resources are updated with the importance of
* each pool, then each resource provider is invoked in turn with a
* list of it's own resources. Finally, the pool importance details
* are removed from the resources.
*
* Returns PO_SUCCESS/PO_FAIL
*/
static int
{
/*
* Call an allocation function for each type of supported resource.
* This function is responsible for "sharing" resources to resource
* sets as determined by the system.allocate-method.
*/
return (PO_FAIL);
(void) remove_importance_props(conf);
return (PO_FAIL);
}
/*
* 'pool.importance' defines the importance of a pool;
* resources inherit the importance of the pool that
* is associated with them. If more than one pool is
* associated with a resource, the importance of the
* resource is the maximum importance of all
* associated pools. Use '_importance' on resources
* to determine who gets extra.
*/
(void) remove_importance_props(conf);
return (PO_FAIL);
}
}
(void) remove_importance_props(conf);
return (PO_SUCCESS);
}
/*
* Work out which allocation method to use based on the value of the
* system.allocate-method property.
*/
int
{
const char *method_name;
int ret;
else {
}
return (PO_FAIL);
} else {
}
} else {
}
switch (method) {
case POA_IMPORTANCE_NUM:
/*
* TODO: Add support for new resource types
*/
switch (pool_resource_elem_class_from_string(type)) {
case PREC_PSET:
break;
default:
break;
}
break;
break;
}
return (ret);
}
/*
* Each set will get its minimum, however if there is more than the
* total minimum available, then leave this in the default set.
*/
int
{
uint_t j;
if (nelem == 1)
return (PO_SUCCESS);
return (PO_FAIL);
}
/* Load current resource values. */
for (j = 0; j < nelem; j++) {
if (default_res == NULL &&
default_res = res[j];
if (resource_get_max(res[j],
resource_get_min(res[j],
resource_get_size(res[j],
return (PO_FAIL);
}
}
/*
* Firstly, for all resources that have size greater than min,
* transfer all movable size above min to the default resource.
*/
for (j = 0; j < nelem; j++) {
/* compute the real minimum number of resources */
PO_SUCCESS) {
return (PO_FAIL);
}
}
}
/*
* Now, transfer resources below min from the default.
*/
for (j = 0; j < nelem; j++) {
/*
* We don't want to interfere with resources which are reserved
*/
PO_SUCCESS) {
return (PO_FAIL);
}
}
}
return (PO_SUCCESS);
}
/*
* Allocate cpus to pset resource sets, favoring sets with higher importance.
*
* Step 1: Sort resource sets by decreasing importance, and load each sets
* current size (oldsize), min, max, and number of pinned cpus.
* Compute the total number of cpus by totaling oldsize.
*
* Step 2: Compute the newsize for each set:
*
* Give each set its min number of cpus. This min may be greater than
* its pset.min due to pinned cpus. If there are more cpus than the total
* of all mins, then the surplus cpus are dealt round-robin to all sets
* (up to their max) in order of decreasing importance. A set may be
* skipped during dealing because it started with more than its min due to
* pinned cpus. The dealing stops when there are no more cpus or all
* sets are at their max. If all sets are at their max, any remaining cpus
* are given to the default set.
*
* Step 3: Transfer cpus from sets with (oldsize > newsize) to sets with
* (oldsize < newsize).
*/
int
{
/* their max */
/* size == newsize */
int deal;
int j;
/*
* Build list of res_info_t's
*/
return (PO_FAIL);
}
/* Order resources by importance, most important being first */
for (j = 0; j < nelem; j++) {
/* Track which resource is the default */
if (default_res == NULL &&
default_res = res[j];
default_res_info = &(res_info[j]);
}
/* Load sets' current values */
PO_FAIL ||
return (PO_FAIL);
}
/* Start each set's newsize out at their min. */
/* pre-deal pinned resources that exceed min */
}
/* Compute total number of resources to deal out */
#ifdef DEBUG
dprintf("res allocation details\n");
#endif /* DEBUG */
}
/*
* Deal one resource to each set, and then another, until all
* resources are dealt or all sets are at their max.
*/
for (j = 0; j < nelem; j++) {
/*
* Skip this resource set if it has already been
* pre-dealt a resource due to pinned resources.
*/
continue;
res_info[j].ri_newsize++;
if (res_info[j].ri_newsize ==
sets_maxed++;
num_to_deal--;
if (num_to_deal == 0)
break;
}
}
}
/*
* If all resource sets are at their max, deal the remaining to the
* default resource set.
*/
}
/*
* Sort so that resource sets needing resources preced resource sets
* that have extra resources. The sort function will also compute
* The quantity of resources that need to be transfered into or out
* of each set so that it's size == newsize.
*/
/*
* The donor index starts at the end of the resource set list and
* walks up. The receiver index starts at the beginning of the
* resource set list and walks down. Cpu's are transfered from the
* donors to the receivers until all sets have transfer == 0).
*/
receiver = 0;
/* Number of sets with transfer == 0 */
sets_finished = 0;
/* Tranfer resources so that each set's size becomes newsize */
for (;;) {
return (PO_FAIL);
}
break;
}
donor--;
continue;
}
receiver++;
continue;
}
/* Transfer resources from the donor set to the receiver */
ntrans) != PO_SUCCESS) {
return (PO_FAIL);
}
}
if (sets_finished != nelem)
return (ret);
}
/*
* Used as a qsort parameter to help order resources in terms of their
* importance, higher importance being first.
*/
int
{
}
/*
* Sort in increasing order so that resource sets with extra resources are at
* the end and resource sets needing resources are at the beginning.
*/
int
{
}
/*
* set_importance_cb() is used to create "_importance" props on each
* resource associated with a pool.
*
* Returns PO_SUCCESS/PO_FAIL
*/
/*ARGSUSED*/
static int
{
POC_INT) {
return (PO_FAIL);
}
NULL) {
return (PO_FAIL);
}
if (old_importance <= importance) {
&val);
}
}
return (PO_SUCCESS);
}
/*
* unset_importance_cb() is used to remove "_importance" props from
* each resource associated with a pool.
*
* Returns PO_SUCCESS/PO_FAIL
*/
/*ARGSUSED*/
static int
{
NULL) {
return (PO_FAIL);
}
PO_FAIL) {
return (PO_FAIL);
}
}
return (PO_SUCCESS);
}
/*
* add_importance_props() is used to create "_importance" props on
* each resource associated with a pool.
*
* Returns PO_SUCCESS/PO_FAIL
*/
static int
{
}
/*
* remove_importance_props() is used to remove "_importance" props on
* each resource associated with a pool.
*
* Returns PO_SUCCESS/PO_FAIL
*/
static int
{
}
/*
* pool_conf_commit_sys() takes a configuration and modifies both the
* supplied configuration and the dynamic configuration. The goal of
* this modification is to generate a dynamic configuration which best
* represents the constraints laid down in the static configuration
* and to update the static configuration with the results of this
* process.
*
* Returns PO_SUCCESS/PO_FAIL
*/
int
{
return (PO_FAIL);
PO_SUCCESS) {
return (PO_FAIL);
}
(void) pool_conf_close(dyn);
return (PO_FAIL);
}
}
/*
* Now try to make the two things "the same".
*/
(void) pool_conf_close(dyn);
return (PO_FAIL);
}
(void) pool_conf_close(dyn);
return (PO_FAIL);
}
(void) pool_conf_close(dyn);
return (PO_SUCCESS);
}
/*
* Copies all properties from one element to another. If the property
* is a readonly property, then don't copy it.
*/
/* ARGSUSED */
static int
{
#ifdef DEBUG
dprintf("Cloning %s from %s\n",
#endif /* DEBUG */
/*
* Some properties should be ignored
*/
return (PO_SUCCESS);
/* The temporary property needs special handling */
else
}
/*
* Removes all properties from one element. Properties which are
* managed by the configuration are ignored.
*/
/* ARGSUSED3 */
static int
{
/*
* Some properties should be ignored
*/
return (PO_SUCCESS);
}