cpudrv.c revision c210ded489f4b4bd3e26ba118f5603c41a5c8c02
5cff782560a1c3cf913ba5574a5123a299f3315emh * CDDL HEADER START
5cff782560a1c3cf913ba5574a5123a299f3315emh * The contents of this file are subject to the terms of the
5cff782560a1c3cf913ba5574a5123a299f3315emh * Common Development and Distribution License (the "License").
5cff782560a1c3cf913ba5574a5123a299f3315emh * You may not use this file except in compliance with the License.
5cff782560a1c3cf913ba5574a5123a299f3315emh * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
5cff782560a1c3cf913ba5574a5123a299f3315emh * See the License for the specific language governing permissions
5cff782560a1c3cf913ba5574a5123a299f3315emh * and limitations under the License.
5cff782560a1c3cf913ba5574a5123a299f3315emh * When distributing Covered Code, include this CDDL HEADER in each
5cff782560a1c3cf913ba5574a5123a299f3315emh * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
5cff782560a1c3cf913ba5574a5123a299f3315emh * If applicable, add the following below this CDDL HEADER, with the
5cff782560a1c3cf913ba5574a5123a299f3315emh * fields enclosed by brackets "[]" replaced with your own identifying
5cff782560a1c3cf913ba5574a5123a299f3315emh * information: Portions Copyright [yyyy] [name of copyright owner]
5cff782560a1c3cf913ba5574a5123a299f3315emh * CDDL HEADER END
c210ded489f4b4bd3e26ba118f5603c41a5c8c02esaxe * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Use is subject to license terms.
5cff782560a1c3cf913ba5574a5123a299f3315emh#pragma ident "%Z%%M% %I% %E% SMI"
5cff782560a1c3cf913ba5574a5123a299f3315emh * CPU Device driver. The driver is not DDI-compliant.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The driver supports following features:
5cff782560a1c3cf913ba5574a5123a299f3315emh * - Power management.
5cff782560a1c3cf913ba5574a5123a299f3315emh * CPU power management
5cff782560a1c3cf913ba5574a5123a299f3315emh * The supported power saving model is to slow down the CPU (on SPARC by
5cff782560a1c3cf913ba5574a5123a299f3315emh * dividing the CPU clock and on x86 by dropping down a P-state).
5cff782560a1c3cf913ba5574a5123a299f3315emh * Periodically we determine the amount of time the CPU is running
5cff782560a1c3cf913ba5574a5123a299f3315emh * idle thread and threads in user mode during the last quantum. If the idle
5cff782560a1c3cf913ba5574a5123a299f3315emh * thread was running less than its low water mark for current speed for
5cff782560a1c3cf913ba5574a5123a299f3315emh * number of consecutive sampling periods, or number of running threads in
5cff782560a1c3cf913ba5574a5123a299f3315emh * user mode are above its high water mark, we arrange to go to the higher
5cff782560a1c3cf913ba5574a5123a299f3315emh * speed. If the idle thread was running more than its high water mark without
5cff782560a1c3cf913ba5574a5123a299f3315emh * dropping a number of consecutive times below the mark, and number of threads
5cff782560a1c3cf913ba5574a5123a299f3315emh * running in user mode are below its low water mark, we arrange to go to the
5cff782560a1c3cf913ba5574a5123a299f3315emh * next lower speed. While going down, we go through all the speeds. While
5cff782560a1c3cf913ba5574a5123a299f3315emh * going up we go to the maximum speed to minimize impact on the user, but have
5cff782560a1c3cf913ba5574a5123a299f3315emh * provisions in the driver to go to other speeds.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The driver does not have knowledge of a particular implementation of this
5cff782560a1c3cf913ba5574a5123a299f3315emh * scheme and will work with all CPUs supporting this model. On SPARC, the
5cff782560a1c3cf913ba5574a5123a299f3315emh * driver determines supported speeds by looking at 'clock-divisors' property
5cff782560a1c3cf913ba5574a5123a299f3315emh * created by OBP. On x86, the driver retrieves the supported speeds from
5cff782560a1c3cf913ba5574a5123a299f3315emh * Configuration function prototypes and data structures
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic int cpudrv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic int cpudrv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic int cpudrv_power(dev_info_t *dip, int comp, int level);
5cff782560a1c3cf913ba5574a5123a299f3315emh 0, /* refcnt */
5cff782560a1c3cf913ba5574a5123a299f3315emh * Function prototypes
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic int cpudrv_pm_comp_create(cpudrv_devstate_t *cpudsp);
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver global variables
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic uint_t cpudrv_pm_idle_buf_zone = CPUDRV_PM_IDLE_BUF_ZONE;
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic uint_t cpudrv_pm_idle_bhwm_cnt_max = CPUDRV_PM_IDLE_BHWM_CNT_MAX;
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic uint_t cpudrv_pm_idle_blwm_cnt_max = CPUDRV_PM_IDLE_BLWM_CNT_MAX;
5cff782560a1c3cf913ba5574a5123a299f3315emh * cpudrv_direct_pm allows user applications to directly control the
5cff782560a1c3cf913ba5574a5123a299f3315emh * power state transitions (direct pm) without following the normal
5cff782560a1c3cf913ba5574a5123a299f3315emh * direct pm protocol. This is needed because the normal protocol
5cff782560a1c3cf913ba5574a5123a299f3315emh * requires that a device only be lowered when it is idle, and be
5cff782560a1c3cf913ba5574a5123a299f3315emh * brought up when it request to do so by calling pm_raise_power().
5cff782560a1c3cf913ba5574a5123a299f3315emh * Ignoring this protocol is harmless for CPU (other than speed).
5cff782560a1c3cf913ba5574a5123a299f3315emh * Moreover it might be the case that CPU is never idle or wants
5cff782560a1c3cf913ba5574a5123a299f3315emh * to be at higher speed because of the addition CPU cycles required
5cff782560a1c3cf913ba5574a5123a299f3315emh * to run the user application.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The driver will still report idle/busy status to the framework. Although
5cff782560a1c3cf913ba5574a5123a299f3315emh * framework will ignore this information for direct pm devices and not
5cff782560a1c3cf913ba5574a5123a299f3315emh * try to bring them down when idle, user applications can still use this
5cff782560a1c3cf913ba5574a5123a299f3315emh * information if they wants.
5cff782560a1c3cf913ba5574a5123a299f3315emh * In the future, provide an ioctl to control setting of this mode. In
5cff782560a1c3cf913ba5574a5123a299f3315emh * that case, this variable should move to the state structure and
5cff782560a1c3cf913ba5574a5123a299f3315emh * be protected by the lock in the state structure.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Arranges for the handler function to be called at the interval suitable
5cff782560a1c3cf913ba5574a5123a299f3315emh * for current speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh (cpudsp)->cpudrv_pm.timeout_id = timeout(cpudrv_pm_monitor_disp, \
5cff782560a1c3cf913ba5574a5123a299f3315emh * Arranges for the handler function not to be called back.
5cff782560a1c3cf913ba5574a5123a299f3315emh sizeof (cpudrv_devstate_t), 0)) != 0) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * Callbacks used by the PPM driver.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver attach(9e) entry point.
5cff782560a1c3cf913ba5574a5123a299f3315emh switch (cmd) {
5cff782560a1c3cf913ba5574a5123a299f3315emh if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) ==
5cff782560a1c3cf913ba5574a5123a299f3315emh * Find CPU number for this dev_info node.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Taskq is used to dispatch routine to monitor CPU activities.
5cff782560a1c3cf913ba5574a5123a299f3315emh "cpudrv_pm_monitor",
5cff782560a1c3cf913ba5574a5123a299f3315emh mutex_init(&cpudsp->cpudrv_pm.timeout_lock, NULL, MUTEX_DRIVER,
5cff782560a1c3cf913ba5574a5123a299f3315emh cv_init(&cpudsp->cpudrv_pm.timeout_cv, NULL, CV_DEFAULT, NULL);
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver needs to assume that CPU is running at unknown speed
5cff782560a1c3cf913ba5574a5123a299f3315emh * at DDI_ATTACH and switch it to the needed speed. We assume
5cff782560a1c3cf913ba5574a5123a299f3315emh * that initial needed speed is full speed for us.
5cff782560a1c3cf913ba5574a5123a299f3315emh * We need to take the lock because cpudrv_pm_monitor()
5cff782560a1c3cf913ba5574a5123a299f3315emh * will start running in parallel with attach().
5cff782560a1c3cf913ba5574a5123a299f3315emh * We don't call pm_raise_power() directly from attach because
5cff782560a1c3cf913ba5574a5123a299f3315emh * driver attach for a slave CPU node can happen before the
5cff782560a1c3cf913ba5574a5123a299f3315emh * CPU is even initialized. We just start the monitoring
5cff782560a1c3cf913ba5574a5123a299f3315emh * system which understands unknown speed and moves CPU
5cff782560a1c3cf913ba5574a5123a299f3315emh * to targ_spd when it have been initialized.
5cff782560a1c3cf913ba5574a5123a299f3315emh if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) ==
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver needs to assume that CPU is running at unknown speed
5cff782560a1c3cf913ba5574a5123a299f3315emh * at DDI_RESUME and switch it to the needed speed. We assume
5cff782560a1c3cf913ba5574a5123a299f3315emh * that the needed speed is full speed for us.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver detach(9e) entry point.
5cff782560a1c3cf913ba5574a5123a299f3315emh switch (cmd) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * If the only thing supported by the driver is power
5cff782560a1c3cf913ba5574a5123a299f3315emh * management, we can in future enhance the driver and
5cff782560a1c3cf913ba5574a5123a299f3315emh * framework that loads it to unload the driver when
5cff782560a1c3cf913ba5574a5123a299f3315emh * user has disabled CPU power management.
5cff782560a1c3cf913ba5574a5123a299f3315emh if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) ==
5cff782560a1c3cf913ba5574a5123a299f3315emh * During a checkpoint-resume sequence, framework will
5cff782560a1c3cf913ba5574a5123a299f3315emh * stop interrupts to quiesce kernel activity. This will
5cff782560a1c3cf913ba5574a5123a299f3315emh * leave our monitoring system ineffective. Handle this
5cff782560a1c3cf913ba5574a5123a299f3315emh * by stopping our monitoring system and bringing CPU
5cff782560a1c3cf913ba5574a5123a299f3315emh * to full speed. In case we are in special direct pm
5cff782560a1c3cf913ba5574a5123a299f3315emh * mode, we leave the CPU at whatever speed it is. This
5cff782560a1c3cf913ba5574a5123a299f3315emh * is harmless other than speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: DDI_SUSPEND - "
5cff782560a1c3cf913ba5574a5123a299f3315emh if (!cpudrv_direct_pm && (cpupm->cur_spd != cpupm->head_spd)) {
5cff782560a1c3cf913ba5574a5123a299f3315emh "instance %d: can't busy CPU "
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver power(9e) entry point.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver's notion of current power is set *only* in power(9e) entry point
5cff782560a1c3cf913ba5574a5123a299f3315emh * after actual power change operation has been successfully completed.
5cff782560a1c3cf913ba5574a5123a299f3315emh/* ARGSUSED */
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_POWER, ("cpudrv_power: instance %d: level %d\n",
5cff782560a1c3cf913ba5574a5123a299f3315emh if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) == NULL) {
5cff782560a1c3cf913ba5574a5123a299f3315emh cmn_err(CE_WARN, "cpudrv_power: instance %d: can't get state",
5cff782560a1c3cf913ba5574a5123a299f3315emh * In normal operation, we fail if we are busy and request is
5cff782560a1c3cf913ba5574a5123a299f3315emh * to lower the power level. We let this go through if the driver
5cff782560a1c3cf913ba5574a5123a299f3315emh * is in special direct pm mode. On x86, we also let this through
5cff782560a1c3cf913ba5574a5123a299f3315emh * if the change is due to a request to throttle the max speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh for (new_spd = cpupm->head_spd; new_spd; new_spd = new_spd->down_spd) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * We currently refuse to power manage if the CPU is not ready to
5cff782560a1c3cf913ba5574a5123a299f3315emh * take cross calls (cross calls fail silently if CPU is not ready
5cff782560a1c3cf913ba5574a5123a299f3315emh * for it).
5cff782560a1c3cf913ba5574a5123a299f3315emh * Additionally, for x86 platforms we cannot power manage
5cff782560a1c3cf913ba5574a5123a299f3315emh * any one instance, until all instances have been initialized.
5cff782560a1c3cf913ba5574a5123a299f3315emh * That's because we don't know what the CPU domains look like
5cff782560a1c3cf913ba5574a5123a299f3315emh * until all instances have been initialized.
5cff782560a1c3cf913ba5574a5123a299f3315emh } else if (!(is_ready = cpudrv_pm_all_instances_ready())) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * Execute CPU specific routine on the requested CPU to change its
5cff782560a1c3cf913ba5574a5123a299f3315emh if ((ret = cpudrv_pm_change_speed(cpudsp, new_spd)) != DDI_SUCCESS) {
5cff782560a1c3cf913ba5574a5123a299f3315emh cmn_err(CE_WARN, "cpudrv_power: cpudrv_pm_change_speed() "
c210ded489f4b4bd3e26ba118f5603c41a5c8c02esaxe * DTrace probe point for CPU speed change transition
c210ded489f4b4bd3e26ba118f5603c41a5c8c02esaxe DTRACE_PROBE3(cpu__change__speed, cpudrv_devstate_t *, cpudsp,
5cff782560a1c3cf913ba5574a5123a299f3315emh * Reset idle threshold time for the new power level.
5cff782560a1c3cf913ba5574a5123a299f3315emh if ((cpupm->cur_spd != NULL) && (level < cpupm->cur_spd->pm_level)) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * Reset various parameters because we are now running at new speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Initialize the field that will be used for reporting
5cff782560a1c3cf913ba5574a5123a299f3315emh * the supported_frequencies_Hz cpu_info kstat.
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic void
5cff782560a1c3cf913ba5574a5123a299f3315emh#define UINT64_MAX_STRING (sizeof ("18446744073709551615"))
5cff782560a1c3cf913ba5574a5123a299f3315emh speeds = kmem_zalloc(cpupm->num_spd * sizeof (uint64_t), KM_SLEEP);
5cff782560a1c3cf913ba5574a5123a299f3315emh CPUDRV_PM_SPEED_HZ(cp->cpu_type_info.pi_clock, spd->speed);
5cff782560a1c3cf913ba5574a5123a299f3315emh supp_freqs = kmem_zalloc((UINT64_MAX_STRING * cpupm->num_spd),
68afbec1fabe0d352bb5ab4ed82c44b58ec651fbmh kmem_free(supp_freqs, (UINT64_MAX_STRING * cpupm->num_spd));
5cff782560a1c3cf913ba5574a5123a299f3315emh * Initialize power management data.
5cff782560a1c3cf913ba5574a5123a299f3315emh /* Need at least two speeds to power manage */
5cff782560a1c3cf913ba5574a5123a299f3315emh * Calculate the watermarks and other parameters based on the
5cff782560a1c3cf913ba5574a5123a299f3315emh * supplied speeds.
5cff782560a1c3cf913ba5574a5123a299f3315emh * One of the basic assumption is that for X amount of CPU work,
5cff782560a1c3cf913ba5574a5123a299f3315emh * if CPU is slowed down by a factor of N, the time it takes to
5cff782560a1c3cf913ba5574a5123a299f3315emh * do the same work will be N * X.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The driver declares that a CPU is idle and ready for slowed down,
5cff782560a1c3cf913ba5574a5123a299f3315emh * if amount of idle thread is more than the current speed idle_hwm
5cff782560a1c3cf913ba5574a5123a299f3315emh * without dropping below idle_hwm a number of consecutive sampling
5cff782560a1c3cf913ba5574a5123a299f3315emh * intervals and number of running threads in user mode are below
5cff782560a1c3cf913ba5574a5123a299f3315emh * user_lwm. We want to set the current user_lwm such that if we
5cff782560a1c3cf913ba5574a5123a299f3315emh * just switched to the next slower speed with no change in real work
5cff782560a1c3cf913ba5574a5123a299f3315emh * load, the amount of user threads at the slower speed will be such
5cff782560a1c3cf913ba5574a5123a299f3315emh * that it falls below the slower speed's user_hwm. If we didn't do
5cff782560a1c3cf913ba5574a5123a299f3315emh * that then we will just come back to the higher speed as soon as we
5cff782560a1c3cf913ba5574a5123a299f3315emh * go down even with no change in work load.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The user_hwm is a fixed precentage and not calculated dynamically.
5cff782560a1c3cf913ba5574a5123a299f3315emh * We bring the CPU up if idle thread at current speed is less than
5cff782560a1c3cf913ba5574a5123a299f3315emh * the current speed idle_lwm for a number of consecutive sampling
5cff782560a1c3cf913ba5574a5123a299f3315emh * intervals or user threads are above the user_hwm for the current
5cff782560a1c3cf913ba5574a5123a299f3315emh for (i = 0; i < nspeeds; i++) {
5cff782560a1c3cf913ba5574a5123a299f3315emh cur_spd = kmem_zalloc(sizeof (cpudrv_pm_spd_t), KM_SLEEP);
5cff782560a1c3cf913ba5574a5123a299f3315emh if (i == 0) { /* normal speed */
5cff782560a1c3cf913ba5574a5123a299f3315emh /* can't speed anymore */
5cff782560a1c3cf913ba5574a5123a299f3315emh * Let's assume CPU is considered idle at full speed
5cff782560a1c3cf913ba5574a5123a299f3315emh * when it is spending I% of time in running the idle
5cff782560a1c3cf913ba5574a5123a299f3315emh * thread. At full speed, CPU will be busy (100 - I) %
5cff782560a1c3cf913ba5574a5123a299f3315emh * of times. This % of busyness increases by factor of
5cff782560a1c3cf913ba5574a5123a299f3315emh * N as CPU slows down. CPU that is idle I% of times
5cff782560a1c3cf913ba5574a5123a299f3315emh * in full speed, it is idle (100 - ((100 - I) * N)) %
5cff782560a1c3cf913ba5574a5123a299f3315emh * of times in N speed. The idle_lwm is a fixed
5cff782560a1c3cf913ba5574a5123a299f3315emh * percentage. A large value of N may result in
5cff782560a1c3cf913ba5574a5123a299f3315emh * idle_hwm to go below idle_lwm. We need to make sure
5cff782560a1c3cf913ba5574a5123a299f3315emh * that there is at least a buffer zone seperation
5cff782560a1c3cf913ba5574a5123a299f3315emh * between the idle_lwm and idle_hwm values.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The lwm for user threads are determined such that
5cff782560a1c3cf913ba5574a5123a299f3315emh * if CPU slows down, the load of work in the
5cff782560a1c3cf913ba5574a5123a299f3315emh * new speed would still keep the CPU at or below the
5cff782560a1c3cf913ba5574a5123a299f3315emh * user_hwm in the new speed. This is to prevent
5cff782560a1c3cf913ba5574a5123a299f3315emh * the quick jump back up to higher speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh /* Slowest speed. Can't slow down anymore */
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_PM_INIT, ("cpudrv_pm_init: instance %d: head_spd spd %d, "
5cff782560a1c3cf913ba5574a5123a299f3315emh for (cur_spd = cpupm->head_spd; cur_spd; cur_spd = cur_spd->down_spd) {
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_PM_INIT, ("cpudrv_pm_init: instance %d: speed %d, "
5cff782560a1c3cf913ba5574a5123a299f3315emh "down_spd spd %d, idle_hwm %d, user_lwm %d, "
5cff782560a1c3cf913ba5574a5123a299f3315emh "up_spd spd %d, idle_lwm %d, user_hwm %d, "
5cff782560a1c3cf913ba5574a5123a299f3315emh#endif /* DEBUG */
5cff782560a1c3cf913ba5574a5123a299f3315emh * Free CPU power management data.
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic void
5cff782560a1c3cf913ba5574a5123a299f3315emh * Create pm-components property.
5cff782560a1c3cf913ba5574a5123a299f3315emh pmc = kmem_zalloc((cpupm->num_spd + 1) * sizeof (char *), KM_SLEEP);
5cff782560a1c3cf913ba5574a5123a299f3315emh "number of speeds exceeded limits",
5cff782560a1c3cf913ba5574a5123a299f3315emh for (i = cpupm->num_spd, cur_spd = cpupm->head_spd; i > 0;
5cff782560a1c3cf913ba5574a5123a299f3315emh "instance %d: speed exceeded limits",
5cff782560a1c3cf913ba5574a5123a299f3315emh sizeof (char *));
5cff782560a1c3cf913ba5574a5123a299f3315emh "instance %d: pm-components power level %d string '%s'\n",
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_PM_COMP_CREATE, ("cpudrv_pm_comp_create: instance %d: "
5cff782560a1c3cf913ba5574a5123a299f3315emh "pm-components component name '%s'\n",
5cff782560a1c3cf913ba5574a5123a299f3315emh if (ddi_prop_update_string_array(DDI_DEV_T_NONE, cpudsp->dip,
5cff782560a1c3cf913ba5574a5123a299f3315emh "pm-components", pmc, cpupm->num_spd + 1) == DDI_PROP_SUCCESS) {
5cff782560a1c3cf913ba5574a5123a299f3315emh "can't create pm-components property",
5cff782560a1c3cf913ba5574a5123a299f3315emh * Mark a component idle.
5cff782560a1c3cf913ba5574a5123a299f3315emh "instance %d: pm_idle_component called\n", \
5cff782560a1c3cf913ba5574a5123a299f3315emh } else { \
5cff782560a1c3cf913ba5574a5123a299f3315emh "can't idle CPU component", \
5cff782560a1c3cf913ba5574a5123a299f3315emh * Marks a component busy in both PM framework and driver state structure.
5cff782560a1c3cf913ba5574a5123a299f3315emh "instance %d: pm_busy_component called\n", \
5cff782560a1c3cf913ba5574a5123a299f3315emh } else { \
5cff782560a1c3cf913ba5574a5123a299f3315emh "can't busy CPU component", \
5cff782560a1c3cf913ba5574a5123a299f3315emh * Marks a component busy and calls pm_raise_power().
5cff782560a1c3cf913ba5574a5123a299f3315emh#define CPUDRV_PM_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm, new_level) { \
5cff782560a1c3cf913ba5574a5123a299f3315emh * Mark driver and PM framework busy first so framework doesn't try \
5cff782560a1c3cf913ba5574a5123a299f3315emh * to bring CPU to lower speed when we need to be at higher speed. \
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor: instance %d: " \
5cff782560a1c3cf913ba5574a5123a299f3315emh "pm_raise_power called to %d\n", ddi_get_instance((dip)), \
5cff782560a1c3cf913ba5574a5123a299f3315emh if (pm_raise_power((dip), CPUDRV_PM_COMP_NUM, (new_level)) != \
5cff782560a1c3cf913ba5574a5123a299f3315emh cmn_err(CE_WARN, "cpudrv_pm_monitor: instance %d: can't " \
5cff782560a1c3cf913ba5574a5123a299f3315emh * In order to monitor a CPU, we need to hold cpu_lock to access CPU
5cff782560a1c3cf913ba5574a5123a299f3315emh * statistics. Holding cpu_lock is not allowed from a callout routine.
5cff782560a1c3cf913ba5574a5123a299f3315emh * We dispatch a taskq to do that job.
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic void
5cff782560a1c3cf913ba5574a5123a299f3315emh * We are here because the last task has scheduled a timeout.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The queue should be empty at this time.
5cff782560a1c3cf913ba5574a5123a299f3315emh if (!taskq_dispatch(cpudsp->cpudrv_pm.tq, cpudrv_pm_monitor, arg,
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor_disp: failed to "
5cff782560a1c3cf913ba5574a5123a299f3315emh "dispatch the cpudrv_pm_monitor taskq\n"));
5cff782560a1c3cf913ba5574a5123a299f3315emh * Monitors each CPU for the amount of time idle thread was running in the
5cff782560a1c3cf913ba5574a5123a299f3315emh * last quantum and arranges for the CPU to go to the lower or higher speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Called at the time interval appropriate for the current speed. The
5cff782560a1c3cf913ba5574a5123a299f3315emh * time interval for normal speed is CPUDRV_PM_QUANT_CNT_NORMAL. The time
5cff782560a1c3cf913ba5574a5123a299f3315emh * interval for other speeds (including unknown speed) is
5cff782560a1c3cf913ba5574a5123a299f3315emh * CPUDRV_PM_QUANT_CNT_OTHR.
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic void
5cff782560a1c3cf913ba5574a5123a299f3315emh * We assume that a CPU is initialized and has a valid cpu_t
5cff782560a1c3cf913ba5574a5123a299f3315emh * structure, if it is ready for cross calls. If this changes,
5cff782560a1c3cf913ba5574a5123a299f3315emh * additional checks might be needed.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Additionally, for x86 platforms we cannot power manage
5cff782560a1c3cf913ba5574a5123a299f3315emh * any one instance, until all instances have been initialized.
5cff782560a1c3cf913ba5574a5123a299f3315emh * That's because we don't know what the CPU domains look like
5cff782560a1c3cf913ba5574a5123a299f3315emh * until all instances have been initialized.
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor: instance %d: "
5cff782560a1c3cf913ba5574a5123a299f3315emh } else if (!(is_ready = cpudrv_pm_all_instances_ready())) {
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor: instance %d: "
5cff782560a1c3cf913ba5574a5123a299f3315emh "waiting for all CPUs to be ready\n",
5cff782560a1c3cf913ba5574a5123a299f3315emh * Make sure that we are busy so that framework doesn't
5cff782560a1c3cf913ba5574a5123a299f3315emh * try to bring us down in this situation.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Make sure that we are still not at unknown power level.
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor: instance %d: "
5cff782560a1c3cf913ba5574a5123a299f3315emh * We just changed the speed. Wait till at least next
5cff782560a1c3cf913ba5574a5123a299f3315emh * call to this routine before proceeding ahead.
5cff782560a1c3cf913ba5574a5123a299f3315emh cmn_err(CE_WARN, "cpudrv_pm_monitor: instance %d: can't get "
5cff782560a1c3cf913ba5574a5123a299f3315emh * We can't do anything when we have just switched to a state
5cff782560a1c3cf913ba5574a5123a299f3315emh * because there is no valid timestamp.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Various watermarks are based on this routine being called back
5cff782560a1c3cf913ba5574a5123a299f3315emh * exactly at the requested period. This is not guaranteed
5cff782560a1c3cf913ba5574a5123a299f3315emh * because this routine is called from a taskq that is dispatched
5cff782560a1c3cf913ba5574a5123a299f3315emh * from a timeout routine. Handle this by finding out how many
5cff782560a1c3cf913ba5574a5123a299f3315emh * ticks have elapsed since the last call (lbolt_cnt) and adjusting
5cff782560a1c3cf913ba5574a5123a299f3315emh * the idle_cnt based on the delay added to the requested period
5cff782560a1c3cf913ba5574a5123a299f3315emh * by timeout and taskq.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Time taken between recording the current counts and
5cff782560a1c3cf913ba5574a5123a299f3315emh * arranging the next call of this routine is an error in our
5cff782560a1c3cf913ba5574a5123a299f3315emh * calculation. We minimize the error by calling
5cff782560a1c3cf913ba5574a5123a299f3315emh * CPUDRV_PM_MONITOR_INIT() here instead of end of this routine.
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_PM_MONITOR_VERBOSE, ("cpudrv_pm_monitor: instance %d: "
5cff782560a1c3cf913ba5574a5123a299f3315emh "idle count %d, user count %d, system count %d, pm_level %d, "
5cff782560a1c3cf913ba5574a5123a299f3315emh "pm_busycnt %d\n", ddi_get_instance(dip), idle_cnt, user_cnt,
5cff782560a1c3cf913ba5574a5123a299f3315emh * Notify that timeout and taskq has caused delays and we need to
5cff782560a1c3cf913ba5574a5123a299f3315emh * scale our parameters accordingly.
5cff782560a1c3cf913ba5574a5123a299f3315emh * To get accurate result, don't turn on other DPRINTFs with
5cff782560a1c3cf913ba5574a5123a299f3315emh * the following DPRINTF. PROM calls generated by other
5cff782560a1c3cf913ba5574a5123a299f3315emh * DPRINTFs changes the timing.
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_PM_MONITOR_DELAY, ("cpudrv_pm_monitor: instance %d: "
5cff782560a1c3cf913ba5574a5123a299f3315emh "lbolt count %ld > quantum_count %u\n",
5cff782560a1c3cf913ba5574a5123a299f3315emh#endif /* DEBUG */
5cff782560a1c3cf913ba5574a5123a299f3315emh * Adjust counts based on the delay added by timeout and taskq.
5cff782560a1c3cf913ba5574a5123a299f3315emh if ((user_cnt > cur_spd->user_hwm) || (idle_cnt < cur_spd->idle_lwm &&
5cff782560a1c3cf913ba5574a5123a299f3315emh cur_spd->idle_blwm_cnt >= cpudrv_pm_idle_blwm_cnt_max)) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * In normal situation, arrange to go to next higher speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh * If we are running in special direct pm mode, we just stay
5cff782560a1c3cf913ba5574a5123a299f3315emh * at the current speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Arrange to go to next lower speed by informing our idle
5cff782560a1c3cf913ba5574a5123a299f3315emh * status to the power management framework.
5cff782560a1c3cf913ba5574a5123a299f3315emh * If we are between the idle water marks and have not
5cff782560a1c3cf913ba5574a5123a299f3315emh * been here enough consecutive times to be considered
5cff782560a1c3cf913ba5574a5123a299f3315emh * busy, just increment the count and return.
5cff782560a1c3cf913ba5574a5123a299f3315emh (cur_spd->idle_bhwm_cnt < cpudrv_pm_idle_bhwm_cnt_max)) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * Arranges to stay at the current speed.