cpudrv.c revision 67bdf3b0f9c03ddb09508476025689fb2ca68f45
5cff782560a1c3cf913ba5574a5123a299f3315emh * CDDL HEADER START
5cff782560a1c3cf913ba5574a5123a299f3315emh * The contents of this file are subject to the terms of the
5cff782560a1c3cf913ba5574a5123a299f3315emh * Common Development and Distribution License (the "License").
5cff782560a1c3cf913ba5574a5123a299f3315emh * You may not use this file except in compliance with the License.
5cff782560a1c3cf913ba5574a5123a299f3315emh * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
5cff782560a1c3cf913ba5574a5123a299f3315emh * See the License for the specific language governing permissions
5cff782560a1c3cf913ba5574a5123a299f3315emh * and limitations under the License.
5cff782560a1c3cf913ba5574a5123a299f3315emh * When distributing Covered Code, include this CDDL HEADER in each
5cff782560a1c3cf913ba5574a5123a299f3315emh * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
5cff782560a1c3cf913ba5574a5123a299f3315emh * If applicable, add the following below this CDDL HEADER, with the
5cff782560a1c3cf913ba5574a5123a299f3315emh * fields enclosed by brackets "[]" replaced with your own identifying
5cff782560a1c3cf913ba5574a5123a299f3315emh * information: Portions Copyright [yyyy] [name of copyright owner]
5cff782560a1c3cf913ba5574a5123a299f3315emh * CDDL HEADER END
fcddbe1ff917b2a8770cd3575f46e72601a06df6Mark Haywood * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Use is subject to license terms.
5cff782560a1c3cf913ba5574a5123a299f3315emh * CPU Device driver. The driver is not DDI-compliant.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The driver supports following features:
5cff782560a1c3cf913ba5574a5123a299f3315emh * - Power management.
5cff782560a1c3cf913ba5574a5123a299f3315emh * CPU power management
5cff782560a1c3cf913ba5574a5123a299f3315emh * The supported power saving model is to slow down the CPU (on SPARC by
5cff782560a1c3cf913ba5574a5123a299f3315emh * dividing the CPU clock and on x86 by dropping down a P-state).
5cff782560a1c3cf913ba5574a5123a299f3315emh * Periodically we determine the amount of time the CPU is running
5cff782560a1c3cf913ba5574a5123a299f3315emh * idle thread and threads in user mode during the last quantum. If the idle
5cff782560a1c3cf913ba5574a5123a299f3315emh * thread was running less than its low water mark for current speed for
5cff782560a1c3cf913ba5574a5123a299f3315emh * number of consecutive sampling periods, or number of running threads in
5cff782560a1c3cf913ba5574a5123a299f3315emh * user mode are above its high water mark, we arrange to go to the higher
5cff782560a1c3cf913ba5574a5123a299f3315emh * speed. If the idle thread was running more than its high water mark without
5cff782560a1c3cf913ba5574a5123a299f3315emh * dropping a number of consecutive times below the mark, and number of threads
5cff782560a1c3cf913ba5574a5123a299f3315emh * running in user mode are below its low water mark, we arrange to go to the
5cff782560a1c3cf913ba5574a5123a299f3315emh * next lower speed. While going down, we go through all the speeds. While
5cff782560a1c3cf913ba5574a5123a299f3315emh * going up we go to the maximum speed to minimize impact on the user, but have
5cff782560a1c3cf913ba5574a5123a299f3315emh * provisions in the driver to go to other speeds.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The driver does not have knowledge of a particular implementation of this
5cff782560a1c3cf913ba5574a5123a299f3315emh * scheme and will work with all CPUs supporting this model. On SPARC, the
5cff782560a1c3cf913ba5574a5123a299f3315emh * driver determines supported speeds by looking at 'clock-divisors' property
5cff782560a1c3cf913ba5574a5123a299f3315emh * created by OBP. On x86, the driver retrieves the supported speeds from
5cff782560a1c3cf913ba5574a5123a299f3315emh * Configuration function prototypes and data structures
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic int cpudrv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic int cpudrv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic int cpudrv_power(dev_info_t *dip, int comp, int level);
5cff782560a1c3cf913ba5574a5123a299f3315emh 0, /* refcnt */
5cff782560a1c3cf913ba5574a5123a299f3315emh * Function prototypes
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxestatic void cpudrv_free(cpudrv_devstate_t *cpudsp);
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxestatic int cpudrv_comp_create(cpudrv_devstate_t *cpudsp);
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver global variables
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxestatic uint_t cpudrv_idle_buf_zone = CPUDRV_IDLE_BUF_ZONE;
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxestatic uint_t cpudrv_idle_bhwm_cnt_max = CPUDRV_IDLE_BHWM_CNT_MAX;
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxestatic uint_t cpudrv_idle_blwm_cnt_max = CPUDRV_IDLE_BLWM_CNT_MAX;
5cff782560a1c3cf913ba5574a5123a299f3315emh * cpudrv_direct_pm allows user applications to directly control the
5cff782560a1c3cf913ba5574a5123a299f3315emh * power state transitions (direct pm) without following the normal
5cff782560a1c3cf913ba5574a5123a299f3315emh * direct pm protocol. This is needed because the normal protocol
5cff782560a1c3cf913ba5574a5123a299f3315emh * requires that a device only be lowered when it is idle, and be
5cff782560a1c3cf913ba5574a5123a299f3315emh * brought up when it request to do so by calling pm_raise_power().
5cff782560a1c3cf913ba5574a5123a299f3315emh * Ignoring this protocol is harmless for CPU (other than speed).
5cff782560a1c3cf913ba5574a5123a299f3315emh * Moreover it might be the case that CPU is never idle or wants
5cff782560a1c3cf913ba5574a5123a299f3315emh * to be at higher speed because of the addition CPU cycles required
5cff782560a1c3cf913ba5574a5123a299f3315emh * to run the user application.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The driver will still report idle/busy status to the framework. Although
5cff782560a1c3cf913ba5574a5123a299f3315emh * framework will ignore this information for direct pm devices and not
5cff782560a1c3cf913ba5574a5123a299f3315emh * try to bring them down when idle, user applications can still use this
5cff782560a1c3cf913ba5574a5123a299f3315emh * information if they wants.
5cff782560a1c3cf913ba5574a5123a299f3315emh * In the future, provide an ioctl to control setting of this mode. In
5cff782560a1c3cf913ba5574a5123a299f3315emh * that case, this variable should move to the state structure and
5cff782560a1c3cf913ba5574a5123a299f3315emh * be protected by the lock in the state structure.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Arranges for the handler function to be called at the interval suitable
5cff782560a1c3cf913ba5574a5123a299f3315emh * for current speed.
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood (cpudsp), (((cpudsp)->cpudrv_pm.cur_spd == NULL) ? \
5cff782560a1c3cf913ba5574a5123a299f3315emh * Arranges for the handler function not to be called back.
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood if (tmp_tid != 0) { \
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood mutex_enter(&(cpudsp)->cpudrv_pm.timeout_lock); \
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood while ((cpudsp)->cpudrv_pm.timeout_count != 0) \
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood mutex_exit(&(cpudsp)->cpudrv_pm.timeout_lock); \
5cff782560a1c3cf913ba5574a5123a299f3315emh sizeof (cpudrv_devstate_t), 0)) != 0) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * Callbacks used by the PPM driver.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver attach(9e) entry point.
5cff782560a1c3cf913ba5574a5123a299f3315emh switch (cmd) {
5cff782560a1c3cf913ba5574a5123a299f3315emh if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) ==
5cff782560a1c3cf913ba5574a5123a299f3315emh * Find CPU number for this dev_info node.
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood mutex_init(&cpudsp->lock, NULL, MUTEX_DRIVER, NULL);
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * Taskq is used to dispatch routine to monitor CPU
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * activities.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe "cpudrv_monitor",
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood mutex_init(&cpudsp->cpudrv_pm.timeout_lock, NULL,
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * Driver needs to assume that CPU is running at
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * unknown speed at DDI_ATTACH and switch it to the
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * needed speed. We assume that initial needed speed
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * is full speed for us.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe * We need to take the lock because cpudrv_monitor()
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * will start running in parallel with attach().
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * We don't call pm_raise_power() directly from attach
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * because driver attach for a slave CPU node can
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * happen before the CPU is even initialized. We just
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * start the monitoring system which understands
173531301317dd4f31e83d4785873141e984ab86Mark Haywood * unknown speed and moves CPU to top speed when it
173531301317dd4f31e83d4785873141e984ab86Mark Haywood * has been initialized.
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood cpudsp = ddi_get_soft_state(cpudrv_state, instance);
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * Nothing to do for resume, if not doing active PM.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver needs to assume that CPU is running at unknown speed
5cff782560a1c3cf913ba5574a5123a299f3315emh * at DDI_RESUME and switch it to the needed speed. We assume
5cff782560a1c3cf913ba5574a5123a299f3315emh * that the needed speed is full speed for us.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver detach(9e) entry point.
5cff782560a1c3cf913ba5574a5123a299f3315emh switch (cmd) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * If the only thing supported by the driver is power
5cff782560a1c3cf913ba5574a5123a299f3315emh * management, we can in future enhance the driver and
5cff782560a1c3cf913ba5574a5123a299f3315emh * framework that loads it to unload the driver when
5cff782560a1c3cf913ba5574a5123a299f3315emh * user has disabled CPU power management.
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood cpudsp = ddi_get_soft_state(cpudrv_state, instance);
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * Nothing to do for suspend, if not doing active PM.
5cff782560a1c3cf913ba5574a5123a299f3315emh * During a checkpoint-resume sequence, framework will
5cff782560a1c3cf913ba5574a5123a299f3315emh * stop interrupts to quiesce kernel activity. This will
5cff782560a1c3cf913ba5574a5123a299f3315emh * leave our monitoring system ineffective. Handle this
5cff782560a1c3cf913ba5574a5123a299f3315emh * by stopping our monitoring system and bringing CPU
5cff782560a1c3cf913ba5574a5123a299f3315emh * to full speed. In case we are in special direct pm
5cff782560a1c3cf913ba5574a5123a299f3315emh * mode, we leave the CPU at whatever speed it is. This
5cff782560a1c3cf913ba5574a5123a299f3315emh * is harmless other than speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: DDI_SUSPEND - "
5cff782560a1c3cf913ba5574a5123a299f3315emh "instance %d: can't busy CPU "
173531301317dd4f31e83d4785873141e984ab86Mark Haywood "can't raise CPU power level to %d",
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver power(9e) entry point.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Driver's notion of current power is set *only* in power(9e) entry point
5cff782560a1c3cf913ba5574a5123a299f3315emh * after actual power change operation has been successfully completed.
5cff782560a1c3cf913ba5574a5123a299f3315emh/* ARGSUSED */
5cff782560a1c3cf913ba5574a5123a299f3315emh DPRINTF(D_POWER, ("cpudrv_power: instance %d: level %d\n",
5cff782560a1c3cf913ba5574a5123a299f3315emh if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) == NULL) {
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe cmn_err(CE_WARN, "cpudrv_power: instance %d: can't "
5cff782560a1c3cf913ba5574a5123a299f3315emh * In normal operation, we fail if we are busy and request is
5cff782560a1c3cf913ba5574a5123a299f3315emh * to lower the power level. We let this go through if the driver
5cff782560a1c3cf913ba5574a5123a299f3315emh * is in special direct pm mode. On x86, we also let this through
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood * if the change is due to a request to govern the max speed.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe if (!cpudrv_direct_pm && (cpudrvpm->pm_busycnt >= 1) &&
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe for (new_spd = cpudrvpm->head_spd; new_spd; new_spd =
5cff782560a1c3cf913ba5574a5123a299f3315emh * We currently refuse to power manage if the CPU is not ready to
5cff782560a1c3cf913ba5574a5123a299f3315emh * take cross calls (cross calls fail silently if CPU is not ready
5cff782560a1c3cf913ba5574a5123a299f3315emh * for it).
5cff782560a1c3cf913ba5574a5123a299f3315emh * Additionally, for x86 platforms we cannot power manage
5cff782560a1c3cf913ba5574a5123a299f3315emh * any one instance, until all instances have been initialized.
5cff782560a1c3cf913ba5574a5123a299f3315emh * That's because we don't know what the CPU domains look like
5cff782560a1c3cf913ba5574a5123a299f3315emh * until all instances have been initialized.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe "waiting for all CPUs to be power manageable\n",
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe * Execute CPU specific routine on the requested CPU to
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe * change its speed to normal-speed/divisor.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe if ((ret = cpudrv_change_speed(cpudsp, new_spd)) != DDI_SUCCESS) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * Reset idle threshold time for the new power level.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe "can't idle CPU component",
5cff782560a1c3cf913ba5574a5123a299f3315emh * Reset various parameters because we are now running at new speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Initialize power management data.
5cff782560a1c3cf913ba5574a5123a299f3315emh /* Need at least two speeds to power manage */
5cff782560a1c3cf913ba5574a5123a299f3315emh * Calculate the watermarks and other parameters based on the
5cff782560a1c3cf913ba5574a5123a299f3315emh * supplied speeds.
5cff782560a1c3cf913ba5574a5123a299f3315emh * One of the basic assumption is that for X amount of CPU work,
5cff782560a1c3cf913ba5574a5123a299f3315emh * if CPU is slowed down by a factor of N, the time it takes to
5cff782560a1c3cf913ba5574a5123a299f3315emh * do the same work will be N * X.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The driver declares that a CPU is idle and ready for slowed down,
5cff782560a1c3cf913ba5574a5123a299f3315emh * if amount of idle thread is more than the current speed idle_hwm
5cff782560a1c3cf913ba5574a5123a299f3315emh * without dropping below idle_hwm a number of consecutive sampling
5cff782560a1c3cf913ba5574a5123a299f3315emh * intervals and number of running threads in user mode are below
5cff782560a1c3cf913ba5574a5123a299f3315emh * user_lwm. We want to set the current user_lwm such that if we
5cff782560a1c3cf913ba5574a5123a299f3315emh * just switched to the next slower speed with no change in real work
5cff782560a1c3cf913ba5574a5123a299f3315emh * load, the amount of user threads at the slower speed will be such
5cff782560a1c3cf913ba5574a5123a299f3315emh * that it falls below the slower speed's user_hwm. If we didn't do
5cff782560a1c3cf913ba5574a5123a299f3315emh * that then we will just come back to the higher speed as soon as we
5cff782560a1c3cf913ba5574a5123a299f3315emh * go down even with no change in work load.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The user_hwm is a fixed precentage and not calculated dynamically.
5cff782560a1c3cf913ba5574a5123a299f3315emh * We bring the CPU up if idle thread at current speed is less than
5cff782560a1c3cf913ba5574a5123a299f3315emh * the current speed idle_lwm for a number of consecutive sampling
5cff782560a1c3cf913ba5574a5123a299f3315emh * intervals or user threads are above the user_hwm for the current
5cff782560a1c3cf913ba5574a5123a299f3315emh for (i = 0; i < nspeeds; i++) {
5cff782560a1c3cf913ba5574a5123a299f3315emh cur_spd = kmem_zalloc(sizeof (cpudrv_pm_spd_t), KM_SLEEP);
5cff782560a1c3cf913ba5574a5123a299f3315emh if (i == 0) { /* normal speed */
5cff782560a1c3cf913ba5574a5123a299f3315emh /* can't speed anymore */
5cff782560a1c3cf913ba5574a5123a299f3315emh * Let's assume CPU is considered idle at full speed
5cff782560a1c3cf913ba5574a5123a299f3315emh * when it is spending I% of time in running the idle
5cff782560a1c3cf913ba5574a5123a299f3315emh * thread. At full speed, CPU will be busy (100 - I) %
5cff782560a1c3cf913ba5574a5123a299f3315emh * of times. This % of busyness increases by factor of
5cff782560a1c3cf913ba5574a5123a299f3315emh * N as CPU slows down. CPU that is idle I% of times
5cff782560a1c3cf913ba5574a5123a299f3315emh * in full speed, it is idle (100 - ((100 - I) * N)) %
5cff782560a1c3cf913ba5574a5123a299f3315emh * of times in N speed. The idle_lwm is a fixed
5cff782560a1c3cf913ba5574a5123a299f3315emh * percentage. A large value of N may result in
5cff782560a1c3cf913ba5574a5123a299f3315emh * idle_hwm to go below idle_lwm. We need to make sure
5cff782560a1c3cf913ba5574a5123a299f3315emh * that there is at least a buffer zone seperation
5cff782560a1c3cf913ba5574a5123a299f3315emh * between the idle_lwm and idle_hwm values.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The lwm for user threads are determined such that
5cff782560a1c3cf913ba5574a5123a299f3315emh * if CPU slows down, the load of work in the
5cff782560a1c3cf913ba5574a5123a299f3315emh * new speed would still keep the CPU at or below the
5cff782560a1c3cf913ba5574a5123a299f3315emh * user_hwm in the new speed. This is to prevent
5cff782560a1c3cf913ba5574a5123a299f3315emh * the quick jump back up to higher speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh /* Slowest speed. Can't slow down anymore */
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe DPRINTF(D_PM_INIT, ("cpudrv_init: instance %d: head_spd spd %d, "
5cff782560a1c3cf913ba5574a5123a299f3315emh for (cur_spd = cpupm->head_spd; cur_spd; cur_spd = cur_spd->down_spd) {
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe DPRINTF(D_PM_INIT, ("cpudrv_init: instance %d: speed %d, "
5cff782560a1c3cf913ba5574a5123a299f3315emh "down_spd spd %d, idle_hwm %d, user_lwm %d, "
5cff782560a1c3cf913ba5574a5123a299f3315emh "up_spd spd %d, idle_lwm %d, user_hwm %d, "
5cff782560a1c3cf913ba5574a5123a299f3315emh#endif /* DEBUG */
5cff782560a1c3cf913ba5574a5123a299f3315emh * Free CPU power management data.
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic void
5cff782560a1c3cf913ba5574a5123a299f3315emh * Create pm-components property.
5cff782560a1c3cf913ba5574a5123a299f3315emh pmc = kmem_zalloc((cpupm->num_spd + 1) * sizeof (char *), KM_SLEEP);
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe cmn_err(CE_WARN, "cpudrv_comp_create: instance %d: "
5cff782560a1c3cf913ba5574a5123a299f3315emh "number of speeds exceeded limits",
5cff782560a1c3cf913ba5574a5123a299f3315emh for (i = cpupm->num_spd, cur_spd = cpupm->head_spd; i > 0;
5cff782560a1c3cf913ba5574a5123a299f3315emh "instance %d: speed exceeded limits",
5cff782560a1c3cf913ba5574a5123a299f3315emh sizeof (char *));
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe CPUDRV_COMP_SPRINT(pmc[i], cpupm, cur_spd, comp_spd)
5cff782560a1c3cf913ba5574a5123a299f3315emh "instance %d: pm-components power level %d string '%s'\n",
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe DPRINTF(D_PM_COMP_CREATE, ("cpudrv_comp_create: instance %d: "
5cff782560a1c3cf913ba5574a5123a299f3315emh "pm-components component name '%s'\n",
5cff782560a1c3cf913ba5574a5123a299f3315emh if (ddi_prop_update_string_array(DDI_DEV_T_NONE, cpudsp->dip,
5cff782560a1c3cf913ba5574a5123a299f3315emh "pm-components", pmc, cpupm->num_spd + 1) == DDI_PROP_SUCCESS) {
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe cmn_err(CE_WARN, "cpudrv_comp_create: instance %d: "
5cff782560a1c3cf913ba5574a5123a299f3315emh "can't create pm-components property",
5cff782560a1c3cf913ba5574a5123a299f3315emh * Mark a component idle.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe#define CPUDRV_MONITOR_PM_IDLE_COMP(dip, cpupm) { \
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe if (pm_idle_component((dip), CPUDRV_COMP_NUM) == \
5cff782560a1c3cf913ba5574a5123a299f3315emh "instance %d: pm_idle_component called\n", \
5cff782560a1c3cf913ba5574a5123a299f3315emh } else { \
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe cmn_err(CE_WARN, "cpudrv_monitor: instance %d: " \
5cff782560a1c3cf913ba5574a5123a299f3315emh "can't idle CPU component", \
5cff782560a1c3cf913ba5574a5123a299f3315emh * Marks a component busy in both PM framework and driver state structure.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe#define CPUDRV_MONITOR_PM_BUSY_COMP(dip, cpupm) { \
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe if (pm_busy_component((dip), CPUDRV_COMP_NUM) == \
5cff782560a1c3cf913ba5574a5123a299f3315emh "instance %d: pm_busy_component called\n", \
5cff782560a1c3cf913ba5574a5123a299f3315emh } else { \
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe cmn_err(CE_WARN, "cpudrv_monitor: instance %d: " \
5cff782560a1c3cf913ba5574a5123a299f3315emh "can't busy CPU component", \
5cff782560a1c3cf913ba5574a5123a299f3315emh * Marks a component busy and calls pm_raise_power().
67bdf3b0f9c03ddb09508476025689fb2ca68f45Mark Haywood#define CPUDRV_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm, new_spd) { \
5cff782560a1c3cf913ba5574a5123a299f3315emh * Mark driver and PM framework busy first so framework doesn't try \
5cff782560a1c3cf913ba5574a5123a299f3315emh * to bring CPU to lower speed when we need to be at higher speed. \
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: " \
5cff782560a1c3cf913ba5574a5123a299f3315emh "pm_raise_power called to %d\n", ddi_get_instance((dip)), \
67bdf3b0f9c03ddb09508476025689fb2ca68f45Mark Haywood ret = pm_raise_power((dip), CPUDRV_COMP_NUM, (new_spd->pm_level)); \
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe cmn_err(CE_WARN, "cpudrv_monitor: instance %d: can't " \
67bdf3b0f9c03ddb09508476025689fb2ca68f45Mark Haywood if (ret == DDI_SUCCESS && cpudsp->cpudrv_pm.cur_spd == NULL) { \
5cff782560a1c3cf913ba5574a5123a299f3315emh * In order to monitor a CPU, we need to hold cpu_lock to access CPU
5cff782560a1c3cf913ba5574a5123a299f3315emh * statistics. Holding cpu_lock is not allowed from a callout routine.
5cff782560a1c3cf913ba5574a5123a299f3315emh * We dispatch a taskq to do that job.
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic void
5cff782560a1c3cf913ba5574a5123a299f3315emh * We are here because the last task has scheduled a timeout.
5cff782560a1c3cf913ba5574a5123a299f3315emh * The queue should be empty at this time.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe if (!taskq_dispatch(cpudsp->cpudrv_pm.tq, cpudrv_monitor, arg,
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe DPRINTF(D_PM_MONITOR, ("cpudrv_monitor_disp: failed to "
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe "dispatch the cpudrv_monitor taskq\n"));
5cff782560a1c3cf913ba5574a5123a299f3315emh * Monitors each CPU for the amount of time idle thread was running in the
5cff782560a1c3cf913ba5574a5123a299f3315emh * last quantum and arranges for the CPU to go to the lower or higher speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Called at the time interval appropriate for the current speed. The
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe * time interval for normal speed is CPUDRV_QUANT_CNT_NORMAL. The time
5cff782560a1c3cf913ba5574a5123a299f3315emh * interval for other speeds (including unknown speed) is
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe * CPUDRV_QUANT_CNT_OTHR.
5cff782560a1c3cf913ba5574a5123a299f3315emhstatic void
5cff782560a1c3cf913ba5574a5123a299f3315emh * We assume that a CPU is initialized and has a valid cpu_t
5cff782560a1c3cf913ba5574a5123a299f3315emh * structure, if it is ready for cross calls. If this changes,
5cff782560a1c3cf913ba5574a5123a299f3315emh * additional checks might be needed.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Additionally, for x86 platforms we cannot power manage
5cff782560a1c3cf913ba5574a5123a299f3315emh * any one instance, until all instances have been initialized.
5cff782560a1c3cf913ba5574a5123a299f3315emh * That's because we don't know what the CPU domains look like
5cff782560a1c3cf913ba5574a5123a299f3315emh * until all instances have been initialized.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: "
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: "
7f606acec863be28b51fb0f694ca86b41ca76e6dMark Haywood "waiting for all CPUs to be power manageable\n",
5cff782560a1c3cf913ba5574a5123a299f3315emh * Make sure that we are busy so that framework doesn't
5cff782560a1c3cf913ba5574a5123a299f3315emh * try to bring us down in this situation.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Make sure that we are still not at unknown power level.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: "
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe CPUDRV_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm,
5cff782560a1c3cf913ba5574a5123a299f3315emh * We just changed the speed. Wait till at least next
5cff782560a1c3cf913ba5574a5123a299f3315emh * call to this routine before proceeding ahead.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe cmn_err(CE_WARN, "cpudrv_monitor: instance %d: can't get "
5cff782560a1c3cf913ba5574a5123a299f3315emh * We can't do anything when we have just switched to a state
5cff782560a1c3cf913ba5574a5123a299f3315emh * because there is no valid timestamp.
fcddbe1ff917b2a8770cd3575f46e72601a06df6Mark Haywood cpupm->lastquan_ticks = NSEC_TO_TICK(gethrtime());
5cff782560a1c3cf913ba5574a5123a299f3315emh * Various watermarks are based on this routine being called back
5cff782560a1c3cf913ba5574a5123a299f3315emh * exactly at the requested period. This is not guaranteed
5cff782560a1c3cf913ba5574a5123a299f3315emh * because this routine is called from a taskq that is dispatched
5cff782560a1c3cf913ba5574a5123a299f3315emh * from a timeout routine. Handle this by finding out how many
fcddbe1ff917b2a8770cd3575f46e72601a06df6Mark Haywood * ticks have elapsed since the last call and adjusting
5cff782560a1c3cf913ba5574a5123a299f3315emh * the idle_cnt based on the delay added to the requested period
5cff782560a1c3cf913ba5574a5123a299f3315emh * by timeout and taskq.
5cff782560a1c3cf913ba5574a5123a299f3315emh * Time taken between recording the current counts and
5cff782560a1c3cf913ba5574a5123a299f3315emh * arranging the next call of this routine is an error in our
5cff782560a1c3cf913ba5574a5123a299f3315emh * calculation. We minimize the error by calling
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe * CPUDRV_MONITOR_INIT() here instead of end of this routine.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe DPRINTF(D_PM_MONITOR_VERBOSE, ("cpudrv_monitor: instance %d: "
5cff782560a1c3cf913ba5574a5123a299f3315emh "idle count %d, user count %d, system count %d, pm_level %d, "
5cff782560a1c3cf913ba5574a5123a299f3315emh "pm_busycnt %d\n", ddi_get_instance(dip), idle_cnt, user_cnt,
5cff782560a1c3cf913ba5574a5123a299f3315emh * Notify that timeout and taskq has caused delays and we need to
5cff782560a1c3cf913ba5574a5123a299f3315emh * scale our parameters accordingly.
5cff782560a1c3cf913ba5574a5123a299f3315emh * To get accurate result, don't turn on other DPRINTFs with
5cff782560a1c3cf913ba5574a5123a299f3315emh * the following DPRINTF. PROM calls generated by other
5cff782560a1c3cf913ba5574a5123a299f3315emh * DPRINTFs changes the timing.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe DPRINTF(D_PM_MONITOR_DELAY, ("cpudrv_monitor: instance %d: "
fcddbe1ff917b2a8770cd3575f46e72601a06df6Mark Haywood "tick count %d > quantum_count %u\n",
fcddbe1ff917b2a8770cd3575f46e72601a06df6Mark Haywood ddi_get_instance(dip), tick_cnt, cur_spd->quant_cnt));
5cff782560a1c3cf913ba5574a5123a299f3315emh#endif /* DEBUG */
5cff782560a1c3cf913ba5574a5123a299f3315emh * Adjust counts based on the delay added by timeout and taskq.
fcddbe1ff917b2a8770cd3575f46e72601a06df6Mark Haywood idle_cnt = (idle_cnt * cur_spd->quant_cnt) / tick_cnt;
fcddbe1ff917b2a8770cd3575f46e72601a06df6Mark Haywood user_cnt = (user_cnt * cur_spd->quant_cnt) / tick_cnt;
5cff782560a1c3cf913ba5574a5123a299f3315emh if ((user_cnt > cur_spd->user_hwm) || (idle_cnt < cur_spd->idle_lwm &&
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe cur_spd->idle_blwm_cnt >= cpudrv_idle_blwm_cnt_max)) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * In normal situation, arrange to go to next higher speed.
5cff782560a1c3cf913ba5574a5123a299f3315emh * If we are running in special direct pm mode, we just stay
5cff782560a1c3cf913ba5574a5123a299f3315emh * at the current speed.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe CPUDRV_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm,
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe (idle_cnt >= cur_spd->idle_hwm) || !CPU_ACTIVE(cpudsp->cp)) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * Arrange to go to next lower speed by informing our idle
5cff782560a1c3cf913ba5574a5123a299f3315emh * status to the power management framework.
5cff782560a1c3cf913ba5574a5123a299f3315emh * If we are between the idle water marks and have not
5cff782560a1c3cf913ba5574a5123a299f3315emh * been here enough consecutive times to be considered
5cff782560a1c3cf913ba5574a5123a299f3315emh * busy, just increment the count and return.
0e7515250c8395f368aa45fb9acae7c4f8f8b786Eric Saxe (cur_spd->idle_bhwm_cnt < cpudrv_idle_bhwm_cnt_max)) {
5cff782560a1c3cf913ba5574a5123a299f3315emh * Arranges to stay at the current speed.