/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file contains the environmental PICL plug-in module.
*/
/*
* This plugin sets up the PICLTREE for Chicago WS.
* fan speeds.
*
* The environmental policy defaults to the auto mode
* as programmed by OBP at boot time.
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/sysmacros.h>
#include <limits.h>
#include <string.h>
#include <strings.h>
#include <stdarg.h>
#include <alloca.h>
#include <unistd.h>
#include <sys/processor.h>
#include <syslog.h>
#include <errno.h>
#include <fcntl.h>
#include <picl.h>
#include <picltree.h>
#include <picldefs.h>
#include <pthread.h>
#include <signal.h>
#include <libdevinfo.h>
#include <sys/systeminfo.h>
#include <note.h>
#include <sys/pic16f747.h>
#include "envd.h"
int debug_fd;
/*
* PICL plugin entry points
*/
static void piclenvd_register(void);
static void piclenvd_init(void);
static void piclenvd_fini(void);
/*
* Env setup routines
*/
extern void env_picl_setup(void);
extern void env_picl_destroy(void);
extern int env_picl_setup_tuneables(void);
/*
* PSU fan fault handling
*/
static boolean_t has_psufan_failed(void);
#pragma init(piclenvd_register)
/*
* Plugin registration information
*/
"SUNW_piclenvd",
};
static int get_disk_temp(env_disk_t *);
/*
* ES Segment stuff
*/
/*
* Default limits for sensors, in case ES segment is not present, or has
* inconsistent information
*/
{
},
{
},
{
},
{
},
{
},
{
},
{
},
{
},
{
}
};
/*
* Env thread variables
*/
/*
* PM thread related variables
*/
/*
* Envd plug-in verbose flag set by SUNW_PICLENVD_DEBUG environment var
* Setting the verbose tuneable also enables debugging for better
* control
*/
int env_debug = 0;
/*
* These are debug variables for keeping track of the total number
* of Fan and Temp sensor retries over the lifetime of the plugin.
*/
static int total_fan_retries = 0;
static int total_temp_retries = 0;
/*
* Fan devices
*/
};
};
};
};
};
/*
* Disk devices
*/
DISK0_ID, -1,
};
DISK1_ID, -1,
};
DISK2_ID, -1,
};
DISK3_ID, -1,
};
/*
* Sensors
*/
};
};
};
};
};
};
};
-1, NULL,
};
};
/*
* The vendor-id and device-id are the properties associated with
* the SCSI controller. This is used to identify a particular controller
* like LSI1064.
*/
/*
* The implementation for SCSI disk drives to supply info. about
* temperature is not mandatory. Hence we first determine if the
* temperature page is supported. To do this we need to scan the list
* of pages supported.
*/
#define SUPPORTED_LPAGES 0
/*
* NULL terminated array of fans
*/
};
/*
* NULL terminated array of disks
*/
};
/*
* NULL terminated array of temperature sensors
*/
};
/*
* Tuneables
*/
#define DISABLE 0
static int disk_high_shutdown_temperature =
static int shutdown_override = 0;
/* Disable disk temperature monitoring until we have LSI fw support */
int disk_temp_monitor = 0;
&get_int_val, &set_int_val, sizeof (int)},
&get_int_val, &set_int_val, sizeof (int)},
&get_int_val, &set_int_val, sizeof (int)},
&get_int_val, &set_int_val, sizeof (int)},
{"sensor-warning-duration", PICL_PTYPE_INT,
sizeof (int)},
{"disk-scan-interval", PICL_PTYPE_INT,
sizeof (int)},
{"fan-scan-interval", PICL_PTYPE_INT,
sizeof (int)},
{"sensor-scan-interval", PICL_PTYPE_INT,
sizeof (int)},
sizeof (int)},
sizeof (int)},
sizeof (int)},
sizeof (int)},
sizeof (int)},
sizeof (shutdown_cmd)},
&get_int_val, &set_int_val, sizeof (int)},
{"disk-high-warn-temperature", PICL_PTYPE_INT,
&set_int_val, sizeof (int)},
{"disk-low-warn-temperature", PICL_PTYPE_INT,
&set_int_val, sizeof (int)},
{"disk-high-shutdown-temperature", PICL_PTYPE_INT,
&set_int_val, sizeof (int)},
{"disk-low-shutdown-temperature", PICL_PTYPE_INT,
&set_int_val, sizeof (int)},
&get_int_val, &set_int_val, sizeof (int)}
};
/*
* We use this to figure out how many tuneables there are
* This is variable because the publishing routine needs this info
* in piclenvsetup.c
*/
/*
* Lookup fan and return a pointer to env_fan_t data structure.
*/
{
int i;
return (fanp);
}
return (NULL);
}
/*
* Lookup sensor and return a pointer to env_sensor_t data structure.
*/
{
int i;
for (i = 0; i < N_ENVD_SENSORS; ++i) {
sensorp = envd_sensors[i];
return (sensorp);
}
return (NULL);
}
/*
* Lookup disk and return a pointer to env_disk_t data structure.
*/
{
int i;
return (diskp);
}
return (NULL);
}
/*
* Get current temperature
* Returns -1 on error, 0 if successful
*/
int
{
int retval = 0;
if (fd == -1)
retval = -1;
retval = -1;
}
(void) sleep(1);
}
if (total_temp_retries && env_debug) {
"Total retries for sensors = %d",
}
}
return (retval);
}
/*
* Get current disk temperature
* Returns -1 on error, 0 if successful
*/
int
{
int retval = 0;
retval = -1;
else
return (retval);
}
/*
* Get current fan speed
* This function returns a RPM value for fanspeed
* in fanspeedp.
* Returns -1 on error, 0 if successful
*/
int
{
int real_tach;
int retries;
return (-1);
if (has_fan_failed(fanp)) {
*fanspeedp = 0;
return (0);
}
/* try to read the fan information */
break;
(void) sleep(1);
}
if (retries >= MAX_FAN_RETRIES)
return (-1);
if (total_fan_retries && env_debug) {
}
return (0);
}
/*
* Set fan speed
* This function accepts a percentage of fan speed
* from 0-100 and programs the HW monitor fans to the corresponding
* fanspeed value.
* Returns -1 on error, -2 on invalid args passed, 0 if successful
*/
int
{
return (-1);
return (-2);
return (-1);
return (0);
}
/*
* close all fan devices
*/
static void
envd_close_fans(void)
{
int i;
}
}
}
/*
* Close sensor devices and freeup resources
*/
static void
envd_close_sensors(void)
{
int i;
for (i = 0; i < N_ENVD_SENSORS; ++i) {
sensorp = envd_sensors[i];
}
}
}
/*
* Open fan devices and initialize per fan data structure.
*/
static int
envd_setup_fans(void)
{
int i, fd;
int fancnt = 0;
/* Make sure cpu0/1 present for validating cpu fans */
PICL_SUCCESS) {
if (env_debug) {
"get node by path failed for %s\n",
}
continue;
}
}
PICL_SUCCESS) {
if (env_debug) {
"get node by path failed for %s\n", CPU0_PATH);
}
continue;
}
}
continue;
}
fancnt++;
}
if (fancnt == 0)
return (-1);
return (0);
}
static int
envd_setup_disks(void)
{
&tnodeh) != PICL_SUCCESS) {
if (env_debug) {
"not found in the system.\n",
}
return (-1);
}
if (env_debug) {
"for SCSI controller. ret = %d errno = 0x%d\n",
}
return (-1);
}
if (env_debug) {
"for SCSI controller. ret = %d errno = 0x%d\n",
}
return (-1);
}
/*
* We have found LSI1064 SCSi controller onboard.
*/
&tnodeh) != PICL_SUCCESS) {
if (env_debug) {
"DISK %d: %s not found in the system.\n",
}
continue;
}
if (env_debug) {
"Error in opening %s errno = 0x%x\n",
}
continue;
}
diskp->warning_tstamp = 0;
diskp->shutdown_tstamp = 0;
/*
* Find out if the Temperature page is supported by the disk.
*/
sizeof (log_page), 1) == 0) {
for (page_index = LOGPAGEHDRSIZE;
page_index++) {
continue;
if (env_debug) {
"tpage supported for %s\n",
}
}
}
/*
* If the temp log page failed, we can check if this is
* a SATA drive and attempt to read the temperature
* using the SMART interface.
*/
if (env_debug)
/* Notification, only when requested */
/*
* Since we know this is a SMART capable
* drive, we will try to set the page and
* determine if the drive is not capable
* of reading the TEMP page when we
* try to read the temperature and disable
* it then. We do not fail when reading
* or writing this page because we will
* determine the SMART capabilities
* when reading the temperature.
*/
"Failed to set mode page");
}
}
if (get_disk_temp(diskp) < 0) {
} else if (env_debug) {
}
}
return (0);
}
static int
envd_es_setup(void)
{
char *envsegp;
/*
* Open the front io fru
*/
return (-1);
}
/*
* Read section header from the fru SEEPROM
*/
return (-1);
}
return (-1);
}
/*
* Locate environmental segment
*/
for (i = 0; i < scn_hdr.sscn_nsegs; i++) {
return (-1);
}
if (env_debug) {
"Seg name: %x off:%x len:%x\n",
}
break;
}
if (i == scn_hdr.sscn_nsegs) {
return (-1);
}
/*
* Read environmental segment
*/
return (-1);
}
return (-1);
}
/*
* Check environmental segment data for consistency
*/
return (-1);
}
/*
* Process environmental segment data
*/
return (-1);
}
for (i = 0; i < envseg->esd_nsensors; i++) {
if (env_debug) {
}
if (ess_id >= MAX_SENSORS) {
return (-1);
}
sizeof (es_sensor_blk_t));
sensorp++;
}
/*
*/
for (i = 0; i < N_ENVD_SENSORS; i++) {
}
/*
* Cleanup and return
*/
return (0);
}
static void
envd_es_default_setup(void)
{
int i, id;
for (i = 0; i < N_ENVD_SENSORS; i++) {
}
}
/*
* Open temperature sensor devices and initialize per sensor data structure.
*/
static int
envd_setup_sensors(void)
{
int sensorcnt = 0;
int i;
for (i = 0; i < N_ENVD_SENSORS; i++) {
if (env_debug)
sensorp = envd_sensors[i];
/* Initialize sensor's initial state */
sensorp->warning_tstamp = 0;
sensorp->shutdown_tstamp = 0;
/* Make sure cpu0/1 sensors are present */
PICL_SUCCESS) {
if (env_debug) {
"get node by path failed for %s\n",
}
continue;
}
}
PICL_SUCCESS) {
if (env_debug) {
"get node by path failed for %s\n",
}
continue;
}
}
if (env_debug) {
}
continue;
}
/*
* Determine if the front panel is attached, we want the
* information if it exists, but should not shut down
* the system if it is removed.
*/
int tries;
&temp) == 0) {
break;
}
(void) sleep(1);
}
if (tries == MAX_SENSOR_RETRIES)
}
sensorcnt++;
}
if (sensorcnt == 0)
return (-1);
return (0);
}
/* ARGSUSED */
static void *
{
int pre_lpstate;
int env_monitor_fd;
cur_lpstate = 0;
pre_lpstate = 1;
if (pm_fd == -1) {
return (NULL);
}
for (;;) {
/*
* Get PM state change events to check if the system
* is in lowest power state and inform PIC which controls
* fan speeds.
*
* To minimize polling, we use the blocking interface
* to get the power state change event here.
*/
break;
continue;
}
do {
if (env_debug) {
"pmstate event:0x%x flags:%x"
"comp:%d oldval:%d newval:%d path:%s\n",
}
if (pre_lpstate != cur_lpstate) {
if (env_debug)
"setting PIC ESTAR SATE to %x\n",
if (env_monitor_fd != -1) {
&estar_state) < 0) {
if (env_debug)
"unable to set ESTAR_MODE in PIC\n");
}
(void) close(env_monitor_fd);
} else {
if (env_debug)
"Failed to open %s\n",
}
}
}
/*NOTREACHED*/
return (NULL);
}
/*
* This is env thread which monitors the current temperature when
* warning threshold is exceeded. The job is to make sure it does
* forced shutdown to avoid reaching hardware poweroff via THERM interrupt.
*/
/*ARGSUSED*/
static void *
{
int ret, i;
for (;;) {
/*
* Sleep for specified seconds before issuing IOCTL
* again.
*/
(void) pthread_mutex_lock(&env_monitor_mutex);
&env_monitor_mutex, &to);
(void) pthread_mutex_unlock(&env_monitor_mutex);
continue;
}
(void) pthread_mutex_unlock(&env_monitor_mutex);
for (i = 0; i < N_ENVD_SENSORS; i++) {
sensorp = envd_sensors[i];
continue;
continue;
if (env_debug) {
"%s temp = %d",
}
/*
* If this sensor already triggered system shutdown,
*/
if (sensorp->shutdown_initiated)
continue;
/*
* Check for the temperature in warning and shutdown
* range and take appropriate action.
*/
sensorp)) {
/*
* Check if the temperature has been in
* warning range during last
* sensor_warning_duration interval. If so,
* the temperature is truly in warning range
* and we need to log a warning message, but
* no more than once every
* sensor_warning_interval seconds.
*/
if (sensorp->warning_start == 0)
(int8_t)
(int8_t)
}
} else if (sensorp->warning_start != 0)
sensorp->warning_start = 0;
if (!shutdown_override &&
sensorp)) {
if (sensorp->shutdown_tstamp == 0)
/*
* Shutdown the system if the temperature
* remains in the shutdown range for over
* sensor_shutdown_interval seconds.
*/
/*
* Log error
*/
(int8_t)
(int8_t)
/*
* Shutdown the system (only once)
*/
if (system_shutdown_started ==
B_FALSE) {
sizeof (syscmd),
"%s \"%s\"", shutdown_cmd,
msgbuf);
}
}
} else if (sensorp->shutdown_tstamp != 0)
sensorp->shutdown_tstamp = 0;
}
} /* end of forever loop */
/*NOTREACHED*/
return (NULL);
}
static int
{
int ret_val;
cdb_buf[0] = SCMD_LOG_SENSE_G1;
/*
* For SATA we need to have the current threshold value set.
* For SAS drives we can use the current cumulative value.
* This is set for non-SMART drives, by passing a non-zero
* page_control.
*/
if (page_control)
else
if (env_debug)
"log sense command for page_code 0x%x succeeded\n", page_code);
return (ret_val);
}
if (env_debug)
"page_code 0x%x ret_val = 0x%x "
return (1);
}
static int
{
int ret;
int i;
if (ret != 0) {
return (-1);
}
/*
* verify the checksum of the data. A 2's compliment
* of the result addition of the is stored in the
* last byte. The sum of all the checksum should be
* 0. If the checksum is bad, return an error for
* this iteration.
*/
for (i = checksum = 0; i < 512; i++)
"SMART checksum error! 0x%x\n", checksum);
/*
* We got bad data back from the drive, fail this
* time around and picl will retry again. If this
* continues to fail picl will give this drive a
* failed status.
*/
return (-1);
}
/*
* Scan through the various SMART data and look for
* the complete drive temp.
*/
for (i = 0; (i < SMART_FIELDS) &&
(temp_attrib == NULL); i++) {
}
}
/*
* If we dont find any temp SMART attributes, this drive
* does not support this page, disable temp checking
* for this drive.
*/
if (temp_attrib == NULL) {
/*
* If the checksum is valid, the temp. attributes are
* not supported, disable this drive from temp.
* checking.
*/
if (env_debug)
"Temp ATTRIBUTE not supported\n");
return (-1);
}
if (env_debug) {
"data = 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
}
} else {
return (-1);
}
} else {
sizeof (tpage), 1);
if (ret != 0) {
return (-1);
}
/*
* For the current temperature verify that the parameter
* length is 0x02 and the parameter code is 0x00
* Temperature value of 255(0xFF) is considered INVALID.
*/
return (-1);
} else {
}
}
/*
* For the reference temperature verify that the parameter
* length is 0x02 and the parameter code is 0x01
* Temperature value of 255(0xFF) is considered INVALID.
*/
} else {
}
}
}
return (0);
}
/* ARGSUSED */
static void *
{
int ret, i;
int idle_time;
int disk_pm_fd;
return (NULL);
}
for (;;) {
/*
* Sleep for specified seconds before issuing IOCTL
* again.
*/
(void) pthread_mutex_lock(&env_monitor_mutex);
&env_monitor_mutex, &to);
(void) pthread_mutex_unlock(
continue;
}
(void) pthread_mutex_unlock(&env_monitor_mutex);
continue;
continue;
/*
* If the disk temperature is above the warning threshold
* continue monitoring until the temperature drops below
* warning threshold.
* if the temperature is in the NORMAL range monitor only
* when the disk is BUSY.
* We do not want to read the disk temperature if the disk is
* is idling. The reason for this is disk will never get into
* lowest power mode if we scan the disk temperature
* peridoically. To avoid this situation we first determine
* the idle_time of the disk. If the disk has been IDLE since
* we scanned the temperature last time we will not read the
* temperature.
*/
if ((idle_time =
&pmstate)) == -1) {
if (env_debug)
"ioctl PM_GET_TIME_IDLE failed for DISK0. errno=0x%x\n",
errno);
continue;
}
continue;
}
if (env_debug) {
}
continue;
}
}
if (ret != 0)
continue;
if (env_debug) {
}
/*
* If this disk already triggered system shutdown, don't
*/
if (diskp->shutdown_initiated)
continue;
/*
* Check for the temperature in warning and shutdown range
* and take appropriate action.
*/
/*
* Check if the temperature has been in warning
* range during last disk_warning_duration interval.
* If so, the temperature is truly in warning
* range and we need to log a warning message,
* but no more than once every disk_warning_interval
* seconds.
*/
if (diskp->warning_start == 0)
disk_warning_duration) && (wtstamp == 0 ||
}
} else if (diskp->warning_start != 0)
diskp->warning_start = 0;
if (!shutdown_override &&
if (diskp->shutdown_tstamp == 0)
/*
* Shutdown the system if the temperature remains
* in the shutdown range for over disk_shutdown_interval
* seconds.
*/
/* log error */
/* shutdown the system (only once) */
if (system_shutdown_started == B_FALSE) {
}
}
} else if (diskp->shutdown_tstamp != 0)
diskp->shutdown_tstamp = 0;
}
} /* end of forever loop */
}
static void *
{
int ret, i;
#ifdef __lint
#endif
for (;;) {
/*
* Sleep for specified seconds before issuing IOCTL
* again.
*/
(void) pthread_mutex_lock(&env_monitor_mutex);
&env_monitor_mutex, &to);
(void) pthread_mutex_unlock(&env_monitor_mutex);
continue;
}
(void) pthread_mutex_unlock(&env_monitor_mutex);
continue;
continue;
} else {
continue;
}
}
if (has_psufan_failed() == B_TRUE) {
if (psufan_last_status == FAN_FAILED)
continue;
} else {
if (psufan_last_status == FAN_OK)
continue;
}
}
/*NOTREACHED*/
return (NULL);
}
/*
* Setup envrionmental monitor state and start threads to monitor
* temperature, fan, disk and power management state.
* Returns -1 on error, 0 if successful.
*/
static int
envd_setup(void)
{
env_debug = 1;
if (pthread_attr_init(&thr_attr) != 0 ||
return (-1);
}
/*
* If ES segment is not present or has inconsistent information, we
* use default values for sensor limits. For the sake of simplicity,
* we still store these limits internally in the 'es' member in the
* structure.
*/
if (envd_es_setup() < 0) {
}
if (envd_setup_sensors() < 0) {
if (env_debug)
system_temp_monitor = 0;
}
if (envd_setup_fans() < 0) {
if (env_debug)
fan_monitor = 0;
pm_monitor = 0;
}
/*
* Disable disk temperature monitoring until we have
* LSI fw support to read SATA disk temperature
*/
if (disk_temp_monitor) {
if (envd_setup_disks() < 0) {
if (env_debug)
disk_temp_monitor = 0;
}
}
/*
* Create a thread to monitor system temperatures
*/
system_temp_thr, NULL) != 0) {
} else {
if (env_debug)
"Created thread to monitor system temperatures\n");
}
}
/*
* Create a thread to monitor fans
*/
else {
if (env_debug) {
"Created thread to monitor system fans\n");
}
}
}
/*
* Create a thread to monitor PM state
*/
else {
if (env_debug)
"Created thread to monitor system power state\n");
}
}
/*
* Create a thread to monitor disk temperature
*/
disk_temp_thr, NULL) != 0) {
} else {
if (env_debug)
"Created thread for disk temperatures\n");
}
}
return (0);
}
static void
piclenvd_register(void)
{
}
static void
piclenvd_init(void)
{
(void) env_picl_setup_tuneables();
/*
* Do not allow disk temperature monitoring to be enabled
* via tuneables. Disk temperature monitoring is disabled
* until we have LSI fw support to read the temperature of
* SATA disks
*/
disk_temp_monitor = 0;
/*
* Setup the environmental data structures
*/
if (envd_setup() != 0) {
return;
}
/*
*/
}
static void
piclenvd_fini(void)
{
/*
* Invoke env_picl_destroy() to remove any PICL nodes/properties
* (including volatile properties) we created. Once this call
* returns, there can't be any more calls from the PICL framework
* to get current temperature or fan speed.
*/
}
/*VARARGS2*/
void
{
}
/*
* Tunables support functions
*/
static env_tuneable_t *
{
int i;
for (i = 0; i < ntuneables; i++) {
return (tuneablep);
}
return (NULL);
}
static int
{
return (PICL_FAILURE);
return (PICL_SUCCESS);
}
static int
{
return (PICL_PERMDENIED);
return (PICL_FAILURE);
return (PICL_SUCCESS);
}
static int
{
return (PICL_FAILURE);
return (PICL_SUCCESS);
}
static int
{
return (PICL_PERMDENIED);
return (PICL_FAILURE);
return (PICL_SUCCESS);
}
{
int real_tach;
return (B_TRUE);
/*
* Read RF_FAN_STATUS bit of the fan fault register, retry if
* the PIC is busy, with a 1 second delay to allow it to update.
*/
break;
(void) sleep(1);
}
if (ntries > 0) {
if (env_debug) {
"%d retries attempted in reading fan status.\n",
ntries);
}
}
if (ntries == MAX_RETRIES_FOR_FAN_FAULT) {
sizeof (fan_status_string));
sizeof (fan_rpm_string));
return (B_TRUE);
}
if (env_debug)
/*
* ST_FFAULT bit isn't implemented yet and we're reading only
* individual fan status
*/
if (status & 0x1) {
"0x%x", status);
sizeof (fan_rpm_string));
} else {
"%d", fan_speed);
}
return (B_TRUE);
}
return (B_FALSE);
}
has_psufan_failed(void)
{
return (B_FALSE);
/*
* For psu, only fan fault is visible, no fan speed
*/
/*
* Read RF_FAN_STATUS bit of the fan fault register, retry if
* the PIC is busy, with a 1 second delay to allow it to update.
*/
break;
(void) sleep(1);
}
if (ntries > 0) {
if (env_debug) {
"%d retries attempted in reading fan status.\n",
ntries);
}
}
if (ntries == MAX_RETRIES_FOR_FAN_FAULT) {
sizeof (fan_status_string));
return (B_TRUE);
}
if (env_debug)
if (status & 0x1) {
"0x%x", status);
return (B_TRUE);
}
return (B_FALSE);
}
static int
{
int ret_val;
cdb_buf[0] = SCMD_MODE_SELECT_G1;
return (ret_val);
}
if (env_debug)
"page_code 0x%x ret_val = 0x%x "
return (1);
}