common/sys/cpudrv.h

	cpudrv.h revision fcddbe1ff917b2a8770cd3575f46e72601a06df6
1137N/A/*
1137N/A * CDDL HEADER START
1137N/A *
1137N/A * The contents of this file are subject to the terms of the
1137N/A * Common Development and Distribution License (the "License").
1137N/A * You may not use this file except in compliance with the License.
1137N/A *
1137N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1137N/A * or http://www.opensolaris.org/os/licensing.
1137N/A * See the License for the specific language governing permissions
1137N/A * and limitations under the License.
1137N/A *
1137N/A * When distributing Covered Code, include this CDDL HEADER in each
1137N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1137N/A * If applicable, add the following below this CDDL HEADER, with the
1137N/A * fields enclosed by brackets "[]" replaced with your own identifying
1137N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1137N/A *
1137N/A * CDDL HEADER END
1137N/A */
1137N/A/*
1137N/A * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
1137N/A * Use is subject to license terms.
1137N/A */
1137N/A
1137N/A#ifndef _SYS_CPUDRV_H
1137N/A#define _SYS_CPUDRV_H
1137N/A
1137N/A#include <sys/promif.h>
1137N/A#include <sys/cpuvar.h>
1137N/A#include <sys/taskq.h>
1137N/A
1137N/A#ifdef  __cplusplus
1137N/Aextern "C" {
1137N/A#endif
1137N/A
1137N/A#ifdef _KERNEL
1137N/A
1137N/A/*
1137N/A * CPU power management data
1137N/A */
1137N/A/*
1137N/A * Data related to a particular speed.
 *
 * All per speed data nodes for a CPU are linked together using down_spd.
 * The link list is ordered with first node containing data for
 * normal (maximum) speed. up_spd points to the next speed up. Currently
 * all up_spd's point to the normal speed but this can be changed in future.
 * quant_cnt is the number of ticks when monitoring system will be called
 * next. There are different quant_cnt for different speeds.
 *
 * Note that 'speed' has different meaning depending upon the platform.
 * On SPARC, the speed is really a divisor of the maximum speed (e.g., a speed
 * of 2 means that it's 1/2 the maximum speed). On x86, speed is a processor
 * frequency.
 */
typedef struct cpudrv_pm_spd {
    uint_t          speed;      /* platform dependent notion */
    uint_t          quant_cnt;  /* quantum count in ticks */
    struct cpudrv_pm_spd    *down_spd;  /* ptr to next speed down */
    struct cpudrv_pm_spd    *up_spd;    /* ptr to next speed up */
    uint_t          idle_hwm;   /* down if idle thread >= hwm */
    uint_t          idle_lwm;   /* up if idle thread < lwm */
    uint_t          idle_bhwm_cnt;  /* # of iters idle is < hwm */
    uint_t          idle_blwm_cnt;  /* # of iters idle is < lwm */
    uint_t          user_hwm;   /* up if user thread > hwm */
    int         user_lwm;   /* down if user thread <= lwm */
    int         pm_level;   /* power level for framework */
} cpudrv_pm_spd_t;

/*
 * Power management data
 */
typedef struct cpudrv_pm {
    cpudrv_pm_spd_t *head_spd;  /* ptr to head of speed */
    cpudrv_pm_spd_t *cur_spd;   /* ptr to current speed */
    uint_t      num_spd;    /* number of speeds */
    hrtime_t    lastquan_mstate[NCMSTATES]; /* last quantum's mstate */
    clock_t     lastquan_ticks; /* last quantum's clock tick */
    int     pm_busycnt; /* pm_busy_component() count  */
    taskq_t     *tq;        /* taskq handler for CPU monitor */
    timeout_id_t    timeout_id; /* cpudrv_pm_monitor()'s timeout_id */
    int     timeout_count;  /* count dispatched timeouts */
    kmutex_t    timeout_lock;   /* protect timeout_count */
    kcondvar_t  timeout_cv; /* wait on timeout_count change */
#if defined(__x86)
    kthread_t   *pm_governor_thread; /* governor thread */
    cpudrv_pm_spd_t *top_spd;   /* ptr to effective head speed */
#endif
    boolean_t   pm_started; /* PM really started */
} cpudrv_pm_t;

/*
 * Idle & user threads water marks in percentage
 */
#if defined(__x86)
#define CPUDRV_PM_IDLE_HWM      85  /* idle high water mark */
#define CPUDRV_PM_IDLE_LWM      70  /* idle low water mark */
#define CPUDRV_PM_IDLE_BLWM_CNT_MAX 1    /* # of iters idle can be < lwm */
#define CPUDRV_PM_IDLE_BHWM_CNT_MAX 1    /* # of iters idle can be < hwm */
#else
#define CPUDRV_PM_IDLE_HWM      98  /* idle high water mark */
#define CPUDRV_PM_IDLE_LWM      8   /* idle low water mark */
#define CPUDRV_PM_IDLE_BLWM_CNT_MAX 2    /* # of iters idle can be < lwm */
#define CPUDRV_PM_IDLE_BHWM_CNT_MAX 2    /* # of iters idle can be < hwm */
#endif
#define CPUDRV_PM_USER_HWM      20  /* user high water mark */
#define CPUDRV_PM_IDLE_BUF_ZONE     4    /* buffer zone when going down */


/*
 * Maximums for creating 'pm-components' property
 */
#define CPUDRV_PM_COMP_MAX_DIG  4   /* max digits in power level */
                    /* or divisor */
#define CPUDRV_PM_COMP_MAX_VAL  9999    /* max value in above digits */

/*
 * Component number for calls to PM framework
 */
#define CPUDRV_PM_COMP_NUM  0   /* first component is 0 */

/*
 * Quantum counts for normal and other clock speeds in terms of ticks.
 *
 * In determining the quantum count, we need to balance two opposing factors:
 *
 *  1) Minimal delay when user start using the CPU that is in low
 *  power mode -- requires that we monitor more frequently,
 *
 *  2) Extra code executed because of frequent monitoring -- requires
 *  that we monitor less frequently.
 *
 * We reach a tradeoff between these two requirements by monitoring
 * more frequently when we are in low speed mode (CPUDRV_PM_QUANT_CNT_OTHR)
 * so we can bring the CPU up without user noticing it. Moreover, at low
 * speed we are not using CPU much so extra code execution should be fine.
 * Since we are in no hurry to bring CPU down and at normal speed and we
 * might really be using the CPU fully, we monitor less frequently
 * (CPUDRV_PM_QUANT_CNT_NORMAL).
 */
#if defined(__x86)
#define CPUDRV_PM_QUANT_CNT_NORMAL  (hz * 1)    /* 1 sec */
#else
#define CPUDRV_PM_QUANT_CNT_NORMAL  (hz * 5)    /* 5 sec */
#endif
#define CPUDRV_PM_QUANT_CNT_OTHR    (hz * 1)    /* 1 sec */

/*
 * Taskq parameters
 */
#define CPUDRV_PM_TASKQ_THREADS     1    /* # threads to run CPU monitor */
#define CPUDRV_PM_TASKQ_MIN     2   /* min # of taskq entries */
#define CPUDRV_PM_TASKQ_MAX     2   /* max # of taskq entries */


/*
 * Device driver state structure
 */
typedef struct cpudrv_devstate {
    dev_info_t  *dip;       /* devinfo handle */
    processorid_t   cpu_id;     /* CPU number for this node */
    cpudrv_pm_t cpudrv_pm;  /* power management data */
    kmutex_t    lock;       /* protects state struct */
    void        *mach_state; /* machine specific state */
} cpudrv_devstate_t;

extern void *cpudrv_state;

/*
 * Debugging definitions
 */
#ifdef  DEBUG
#define D_INIT          0x00000001
#define D_FINI          0x00000002
#define D_ATTACH        0x00000004
#define D_DETACH        0x00000008
#define D_POWER         0x00000010
#define D_PM_INIT       0x00000020
#define D_PM_FREE       0x00000040
#define D_PM_COMP_CREATE    0x00000080
#define D_PM_MONITOR        0x00000100
#define D_PM_MONITOR_VERBOSE    0x00000200
#define D_PM_MONITOR_DELAY  0x00000400

extern uint_t   cpudrv_debug;

#define _PRINTF prom_printf
#define DPRINTF(flag, args) if (cpudrv_debug & flag) _PRINTF args;
#else
#define DPRINTF(flag, args)
#endif /* DEBUG */

extern int cpudrv_pm_change_speed(cpudrv_devstate_t *, cpudrv_pm_spd_t *);
extern boolean_t cpudrv_pm_get_cpu_id(dev_info_t *, processorid_t *);
extern boolean_t cpudrv_pm_power_ready(void);
extern boolean_t cpudrv_pm_is_governor_thread(cpudrv_pm_t *);
extern boolean_t cpudrv_mach_pm_init(cpudrv_devstate_t *);
extern void cpudrv_mach_pm_free(cpudrv_devstate_t *);

#endif /* _KERNEL */

#ifdef  __cplusplus
}
#endif

#endif /* _SYS_CPUDRV_H */