ipmi_plugin.c revision 7a0b67e3ef0ce92ca436e68c45383a76e14311a0
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* IPMI Plugin for the disk hotplug & fault monitor
*/
#include <sys/byteorder.h>
#include <inttypes.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <stddef.h>
#include <stropts.h>
#include <stdlib.h>
#include <errno.h>
#include <pthread.h>
#include <ctype.h>
#include <limits.h>
#include <utility.h>
#include <libnvpair.h>
#include <sys/bmc_intf.h>
#include <libuutil.h>
#include "dm_plugin.h"
#include "util.h"
#include "ipmi_plugin.h"
#define MESSAGE_BUFSIZE 1024
#define STRUCT_MIN_SIZE(t, o) offsetof(t, o)
/* For the purposes of disk capacity, a <X>B is 1000x, not 1024x */
#define ONE_KILOBYTE 1000.0
/* IPMI Command Code definitions */
#define IPMI_NETFN_OEM 0x2E
#define IPMI_CMD_GET_UPTIME 0x08
#define IPMI_CMD_FRU_UPDATE 0x16
#define IPMI_CMD_GET_SENSOR_READING 0x2d
#define IPMI_CMD_SET_SENSOR_READING 0x30
#define IPMI_CMD_ADD_SEL_ENTRY 0x44
/* IPMI Request types supported by this plugin: */
#pragma pack(1)
struct ipmi_sensor_control {
#define SC_ASSERT_OP 0x20
#define SC_DEASSERT_OP 0x08
/*
* The following two fields are stored and sent to the bmc in
* little-endian form
*/
};
/*
* Virtual sensor format for FRU data (Sun OEM)
*/
struct ipmi_fru_update {
char d_manuf[16];
char d_model[28];
char d_serial[20];
char d_firmware[8];
char d_capacity[16];
};
struct ipmi_sel_entry {
#define SEL_TYPE_OEM 0xC0
};
struct ipmi_sensor_reading {
data_unavailable : 1,
scanning_enabled : 1,
};
/*
* The following structure's members is returned in BIG-ENDIAN form.
*/
struct bmc_uptime_info {
};
#pragma pack()
/* End of request types supported */
struct ipmi_cmd_setup {
const char *name;
};
typedef struct ipmi_action_handle {
void *databp;
int datablen;
typedef enum {
typedef struct bmc_cache_ent {
union {
struct ipmi_fru_update fru_Info;
/*
* The deasserted field is not used
* to cache data in the sensor_control
* structure (we cache asserted states):
*/
struct ipmi_sensor_control sensor_Ctl;
} u;
#define sensorCtl u.sensor_Ctl
typedef struct bmc_replay_list_ent {
int datablen;
/*
* The ipmi_mutex protects the bmc state$ and serializes bmc device access
*/
/* IPMI Command Buffer-Setup Functions: */
/* BMC Monitor and BMC Cache functions: */
static int bmc_cache_init(void);
static void bmc_cache_fini(void);
static int bmc_state_refresh_from_cache(void);
static void bmc_monitor_thread(void *arg);
/* BMC Replay List functions: */
static int bmc_replay_list_init(void);
static void bmc_replay_list_fini(void);
static int bmc_replay_list_execute(void);
/* IPMI commands used internally */
/* plugin entry points: */
static dm_plugin_error_t ipmi_plugin_init(void);
static dm_plugin_error_t ipmi_plugin_fini(void);
};
static struct ipmi_cmd_setup ipmi_cmd_tab[] = {
0, IPMI_CMD_FRU_UPDATE },
0, IPMI_CMD_ADD_SEL_ENTRY },
};
static pthread_t g_bmcmon_tid;
static boolean_t g_bmc_monitor_active;
static boolean_t g_bmcmon_done;
static int g_BMCErrorInjectionRate = 0;
static int g_bmc_fd = -1;
/*
* The textual strings that are used in the actions may be one of the
* following forms:
*
* [1] `fru gid=<n> hdd=<m>'
* [2] `sensor id=<x> assert=<y> deassert=<z>'
*
* The generic parser will take a string and spit out the first token
* (e.g. `fru' or `sensor') and an nvlist that contains the key-value
* pairs in the rest of the string. The assumption is that there are
* no embedded spaces or tabs in the keys or values.
*/
static boolean_t
{
int digits = 0;
str += 2;
str++;
}
while (*str != 0) {
return (B_FALSE);
}
str++;
digits++;
}
}
static void
tolowerString(char *str)
{
while (*str != 0) {
str++;
}
}
static boolean_t
{
char *action;
int actionlen;
return (B_FALSE);
/* Look for a name=val construct */
*eq = 0;
eq++;
/*
* Convert token to lowercase to preserve
* case-insensitivity, because nvlist doesn't
* do case-insensitive lookups
*/
/* Integer property */
} else {
/* String property */
eq) != 0)
}
/* Boolean property */
} else /* Parse error (`X=' is invalid) */
}
} else
if (!rv) {
if (*cmdp) {
}
}
return (rv);
}
static ipmi_action_handle_t *
int datablen)
{
return (ret);
}
static void
bmc_reopen(void)
{
if (g_bmc_fd >= 0)
log_warn_e("Could not reopen bmc device");
}
}
static void
{
if (hdlp) {
}
}
static boolean_t
{
int i;
return (B_TRUE);
}
return (B_FALSE);
}
static dm_plugin_error_t
{
(void) bmc_state_refresh(NULL);
if (g_need_exec_replay)
g_need_exec_replay = (bmc_replay_list_execute() != 0);
if (!g_need_exec_replay) {
? DMPE_SUCCESS : DMPE_FAILURE;
}
/*
* If the command failed (or we couldn't execute the command because
* we couldn't execute the replay list), and the failure is due to a
* timeout error, save the command's result for later replay
*/
/*
* Fake the return value as success (since we queued the
* command for later execution).
*/
rv = DMPE_SUCCESS;
/* Apply the command to the bmc state$ */
}
return (rv);
}
static dm_plugin_error_t
{
/*
* of those defined in the ipmi_cmd_tab:
*/
"not match any known commands.\n", (void *)hdlp);
return (DMPE_FAILURE);
}
return (rv);
}
static dm_plugin_error_t
{
char *cmd;
int found_index;
int datablen, i;
void *databp;
for (found_index = -1, i = 0;
found_index = i;
}
}
}
}
return (rv);
}
static dm_plugin_error_t
void *arg)
{
struct ipmi_fru_update *fup;
char *buf;
/* We need 2 properties: `gid' and `hdd': */
return (DMPE_FAILURE);
}
*datablen = sizeof (struct ipmi_fru_update);
sizeof (fup->d_capacity);
/*
* Print the size of the disk to a temporary buffer whose size is
* 1 more than the size of the buffer in the ipmi request data
* structure, so we can get the full 8 characters (instead of 7 + NUL)
*/
"%.1f%s",
"KB"))));
return (DMPE_SUCCESS);
}
/*ARGSUSED*/
static dm_plugin_error_t
void *arg)
{
struct ipmi_sensor_control *scp;
/* We need at least 2 properties: `sid' and (`amask' || `dmask'): */
(!am_present && !dam_present)) {
return (DMPE_FAILURE);
}
sid);
return (DMPE_FAILURE);
} else if (assertmask > UINT16_MAX) {
"'.\n", assertmask);
return (DMPE_FAILURE);
} else if (assertmask > UINT16_MAX) {
"'.\n", deassertmask);
return (DMPE_FAILURE);
}
(dam_present ? SC_DEASSERT_OP : 0);
*datablen = sizeof (struct ipmi_sensor_control);
return (DMPE_SUCCESS);
}
/*ARGSUSED*/
static dm_plugin_error_t
void *arg)
{
struct ipmi_sel_entry *sep;
/* We need 2 properties: `oem' and `manu': */
return (DMPE_FAILURE);
}
if ((manuf_id & ~0xFFFFFFULL) != 0) {
manuf_id);
return (DMPE_FAILURE);
} else if ((oem_data & ~0xFFFFFFFFFFFFULL) != 0) {
"'.\n", oem_data);
return (DMPE_FAILURE);
}
*datablen = sizeof (struct ipmi_sel_entry);
return (DMPE_SUCCESS);
}
static dm_plugin_error_t
{
struct ipmi_sensor_reading *srp;
/* The command must return precisely the size of the data we expect */
rv = DMPE_FAILURE;
if (rv == DMPE_SUCCESS &&
if (assrtd) {
if (updated_flag)
*updated_flag = B_TRUE;
}
}
return (rv);
}
static dm_plugin_error_t
{
struct bmc_uptime_info *utinfop;
1, &rsp);
/* The command must return precisely the size of the data we expect */
rv = DMPE_FAILURE;
if (rv == DMPE_SUCCESS) {
if (uptime)
if (bootgen)
}
return (rv);
}
/* ****** B M C R E P L A Y L I S T I M P L E M E N T A T I O N ****** */
/*
* The reasoning behind the replay list is to try to ensure that commands are
* reliably sent to the BMC. In the case of the replay list, any commands that
* fail because they timed out are added tothe replay list. Then, the next time
* a command is attempted, the replay list is sent to the BMC first, then the
* new command (to preserve ordering). Currently, the only commands that are
* supported by this plugin are write-oriented commands, where information is
* sent to the BMC. If, if the future, read-oriented commands are desired,
* The replay mechanism will need to be enhanced to force all pending commands
* in the replay list out to the BMC before executing the read-oriented
* command (similar to a write cache that's flushed when a read is requested).
*/
static void
{
if (p->databp)
dfree(p, sizeof (bmc_replay_list_ent_t));
}
static int
bmc_replay_list_init(void)
{
if ((g_uu_pool_replay = uu_list_pool_create(
"bmc_replay_list_pool", sizeof (bmc_replay_list_ent_t),
return (DMPE_FAILURE);
== NULL) {
return (DMPE_FAILURE);
}
return (DMPE_SUCCESS);
}
static void
bmc_replay_list_fini(void)
{
}
}
/*
* The caller must hold the ipmi_mutex!
*/
static void
int datablen)
{
/*
* Make a deep copy of the data buffer, since we can't assume
* anything about when it will be deallocated.
*/
if (datablen > 0) {
}
/* The replay list is a queue, so add to its tail: */
}
/*
* The caller must hold the ipmi_mutex!
*
* Returns < 0 if the replay list should be executed at a later time
* (due to transient errors)
*/
static int
bmc_replay_list_execute(void)
{
bmc_replay_list_ent_t *p = NULL;
return (-1);
/*
* On the first timeout error, abort the replay; We cannot execute
* commands later in the list because they may depend on the state
* set by earlier commands. We'll retry the command that failed
* later. (Note that non-timeout-related failures do not cause
* aborts because the assumption is that the original command caller
* would not behave differently if a command were to fail.) If this
* assumption does not remain valid in the future, an enhancement to
* the plugin API would be required to introduce a synchronous flag
* that would result in the blocking of the calling thread until
* BOTH the replay list is fully executed AND the user's current
* command is executed (at which point the status can be examined
* by the caller).
*/
if (rv == DMPE_SUCCESS ||
(rv == DMPE_FAILURE &&
}
if (rv == DMPE_SUCCESS) {
/* Add the command to the bmc state$ */
}
} else if (rv == DMPE_FAILURE &&
}
}
return (timedout_err ? -1 : 0);
}
/* ************** B M C C A C H E I M P L E M E N T A T I O N ************* */
/*
* The reasoning behind the cache is to maintain a mirror of the BMC's state
* as it pertains to the commands that were sent from the plugin. For Sun's
* BMC implementations, the sensor and FRU information is not currently
* preserved when the BMC (or service processor) is reset (or rebooted). To
* maintain consistency from the user/administrator's perspective, once the
* BMC comes back online after a reset, the information from the state cache
* is sent, all at once, in particular order, to the BMC.
*/
static int
bmc_cache_init(void)
{
if ((g_uu_pool_cache = uu_list_pool_create(
"bmc_cache_entry_pool", sizeof (bmc_cache_ent_t),
return (DMPE_FAILURE);
== NULL) {
return (DMPE_FAILURE);
}
return (DMPE_SUCCESS);
}
static void
bmc_cache_fini(void)
{
void *p;
dfree(p, sizeof (bmc_cache_ent_t));
}
static void
{
struct ipmi_sensor_control *tgt;
/*
* operation is initted here so that when we do the bmc update from
* the cache, the structure will ready to send directly from the cache
*/
/*
* If the command fails, we'll still have the asserted
* states that were set by the command that just finished
*/
&was_assrtd_updated) == DMPE_SUCCESS &&
was_assrtd_updated == B_TRUE) {
/*
* If the states that were just asserted are not when we
* check, issues a warning, but only if the verbosity is
* jacked up -- this could be OK (if another user updates
* the sensor's state between the time we executed the
* update sensor command and the time we check the sensor's
* value.
*/
!= p->sensorCtl.assert_states) {
"Asserted state(s) set before cache addition "
"(0x%x) didn't stick -- caching them anyway\n",
p->sensorCtl.assert_states);
}
}
}
static void
{
/*
* It's not possible for the same bits to be set in the assert and
* deassert masks- it would have cause an IPMI error when the
* command was originally executed (and the cache update would
* therefore not have occurred)
*/
}
static boolean_t
{
}
static void
int *buflenp)
{
/* Mask off bits that shouldn't be set according to the spec */
p->sensorCtl.deassert_states =
*buflenp = sizeof (struct ipmi_sensor_control);
}
static void
{
}
static void
{
}
static boolean_t
{
}
static void
int *buflenp)
{
*buflenp = sizeof (struct ipmi_fru_update);
}
/*
* Different elements in the cache need to be restored in order
* (e.g. sensor state information must be populated before FRU information
* is populated because the FRU information won't "stick" if the right
* state isn't asserted)
* The g_restoreOrder array is indexed by cache entry type
*/
static const bmc_cache_ent_type_t g_restoreOrder[] = {
};
static struct bmc_cache_member {
int dataszmin;
void (*updatefn)(bmc_cache_ent_t *, void *);
void (*initfn)(bmc_cache_ent_t *, void *);
void (*bufsetupfn)(bmc_cache_ent_t *,
void **, int *);
void (*bufdonefn)(bmc_cache_ent_t *, void *,
int);
} g_cachemembers[] = {
/* CACHE_ENT_FIRST */
{ 0, 0, 0, 0, NULL },
/* CACHE_ENT_FRUINFO */
sizeof (struct ipmi_fru_update),
NULL },
/* CACHE_ENT_SENSORCTL */
sizeof (struct ipmi_sensor_control),
NULL },
/* CACHE_ENT_LAST */
{ 0, 0, 0, 0, NULL }
};
static bmc_cache_ent_t *
{
bmc_cache_ent_t *p = NULL;
return (NULL);
}
}
}
static void
int datablen)
{
int i;
bmc_cache_ent_t *p;
i++) {
p->type = i;
}
}
if (found_initfn) {
uu_list_insert(g_uu_cachelist, p, 0);
} else {
dfree(p, sizeof (bmc_cache_ent_t));
}
}
/*
* The caller must hold the ipmi_mutex!
*/
static void
{
bmc_cache_ent_t *p;
/*
* Do a lookup to see if we have an entry for this entity.
* If so, update it, otherwise, create a new entry in the cache.
*/
!= NULL) {
/* Update the cache with the command payload */
} else {
/* Add the item to the cache */
}
}
/*
* Caller MUST hold the ipmi_lock
*/
static int
{
int i;
bmc_cache_ent_t *p = NULL;
void *databp;
int datablen;
/*
* Since cached state needs to be restored in a particular
* order, make several passes through the cache list, restoring
* the state in pass-order. If this becomes performance-limiting,
* the cache list can be populated in sorted order (in pass order)
*/
return (-1);
if (p->type == g_restoreOrder[i]) {
if (rv == DMPE_FAILURE &&
}
}
}
return (bail ? -1 : 0);
}
/*
* Caller MUST hold the ipmi_lock
*/
static int
{
static uint32_t last_utime = 0;
if (!g_bmc_monitor_active)
return (0);
if (refreshed)
if (rv == DMPE_SUCCESS) {
/*
* This also handles the wrap-around case (when utime is
* less than last_utime, but iter == last_iter), and
* also the case when the BMC's configuration is
* reset after a reboot (e.g. the reboot iteration #
* is reset to 0).
*/
if (initted &&
if (bmc_state_refresh_from_cache() < 0) {
return (-1);
} else {
if (refreshed)
}
}
last_utime = utime;
}
return (0);
}
/*ARGSUSED*/
static void
bmc_monitor_thread(void *arg)
{
while (!g_bmcmon_done) {
/*
* If the state was successfully refreshed, and there's
* replay list, execute that list.
*/
if (g_need_exec_replay) {
(bmc_replay_list_execute() != 0);
}
"cached state!\n");
}
/* Poll the BMC for any changes in its state every minute */
(void) pthread_cond_timedwait(&ipmi_cond,
&ipmi_mutex, &tspec);
}
}
/* ***************** P L U G I N E N T R Y P O I N T S ******************* */
static dm_plugin_error_t
ipmi_plugin_init(void)
{
int method;
const char *monpropval =
const char *errinjprop =
log_warn_e("Could not open bmc device");
return (DMPE_FAILURE);
}
log_warn("IPMI plugin: Could not determine bmc messaging "
"interface!\n");
return (DMPE_FAILURE);
}
/*
* Keep the bmc device open to prevent the driver from unloading
* at a critical moment (e.g. when the BMC is not available). If
* we didn't do this, subsequent attempt at opening the bmc device
* would fail because the bmc driver would not be able to find
* the BMC (if it's resetting), and once the bmc's probe fails,
* the system will not reload it automatically.
*/
if (bmc_replay_list_init() != 0) {
return (DMPE_FAILURE);
}
if (errinjprop != NULL)
/*
* Check to see if the BMC supports the Sun OEM uptime command
* If it does, spawn a monitoring thread that will periodically poll
* the bmc and check for bmc resets (since the bmc does not retain
* the state across resets)
*/
if (bmc_cache_init() != 0) {
return (DMPE_FAILURE);
}
} else
return (DMPE_SUCCESS);
}
static dm_plugin_error_t
{
}
static dm_plugin_error_t
ipmi_plugin_bind_handle(const char *actionString,
{
(ipmi_action_handle_t **)hdlp));
}
static dm_plugin_error_t
{
}
static dm_plugin_error_t
{
return (DMPE_SUCCESS);
}
static dm_plugin_error_t
ipmi_plugin_fini(void)
{
if (g_bmc_monitor_active) {
(void) pthread_cond_broadcast(&ipmi_cond);
/* Signal the thread just in case it's blocked doing BMC I/O */
/* Clean up cache lists */
}
return (DMPE_SUCCESS);
}
/* ************** I P M I S U P P O R T F U N C T I O N S **************** */
static dm_plugin_error_t
{
static int inject_rep = 0;
if (g_BMCErrorInjectionRate > 0 &&
(++inject_rep % g_BMCErrorInjectionRate) == 0) {
inject_rep = 0;
return (DMPE_FAILURE);
}
if (g_bmc_fd < 0)
bmc_reopen();
/* sendrecv_fn cannot be NULL at this point */
return (rv);
}
static dm_plugin_error_t
{
struct bmc_reqrsp reqrsp;
if (datalen > SEND_MAX_PAYLOAD_SIZE) {
log_warn("IPMI Plugin: Data payload length (%d) is too "
"large; it cannot be processed by this version of "
"the bmc driver.\n", datalen);
return (DMPE_FAILURE);
}
log_warn_e("IPMI Plugin: ioctl failure");
return (DMPE_FAILURE);
}
/* Decrement for sizeof lun, cmd and ccode */
return (DMPE_SUCCESS);
}
static dm_plugin_error_t
{
int flags = 0;
/*
* The length of the message structure is equal to the size of the
* bmc_req_t structure, PLUS any additional data space in excess of
* the data space already reserved in the data member + <n> for
* the rest of the members in the bmc_msg_t structure.
*/
((datalen > SEND_MAX_PAYLOAD_SIZE) ?
(datalen - SEND_MAX_PAYLOAD_SIZE) : 0);
log_warn_e("IPMI Plugin: putmsg failure");
/*
* As a workaround for a bug in bmc, if an error was returned
* from putmsg, we need to close the fd and reopen it to clear
* the error state.
*/
bmc_reopen();
return (DMPE_FAILURE);
}
log_warn_e("IPMI Plugin: getmsg failure");
return (DMPE_FAILURE);
}
/*LINTED*/
/* Did we get an error back from the stream? */
case BMC_MSG_RESPONSE:
break;
case BMC_MSG_ERROR:
/* In case of an error, msg->msg[0] has the error code */
log_warn("IPMI Plugin: bmc_send_cmd error: %s\n",
break;
}
return (DMPE_SUCCESS);
}
/*
* Determine which interface to use. Returns the interface method
* to use.
*/
static int
{
int retval = 0;
/*
* If the ioctl doesn't exist, we should get an EINVAL back.
* Bail out on any other error.
*/
retval = -1;
else
}
if (retval == 0)
return (retval);
}
static void
{
int i;
if (request->datalength > 0) {
for (i = 0; i < request->datalength; i++)
} else {
}
}
static void
{
int i;
if (response->datalength > 0) {
for (i = 0; i < response->datalength; i++)
} else {
}
}