/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/sysmacros.h>
#include <sys/ddi_impldefs.h>
#include <sys/ddi_implfuncs.h>
#include <sys/pcie_impl.h>
#include <sys/pcie_pwr.h>
/*
* This file implements the power management functionality for
* pci express switch and pci express-to-pci/pci-x bridge. All the
* code in this file is generic and is not specific to a particular chip.
* The algorithm, which decides when to go to a lower power is explained
* below:
*
* 1. Initially when no children are attached, the driver is idle from
*
* 2. Driver is PM busy if either a reference count called pwr_hold is
* greater than zero or driver is already at the lowest possible power
* level. The lowest possible power level for the driver is equal to the
* highest power level among its children. The PM busy condition is
* indicated by PCIE_PM_BUSY bit. At any point, only one pm_busy_component
* call is made for a nexus driver instance.
*
* 3. Driver is PM idle if the pwr_hold is zero and the lowest
* possible power level is less than the driver's current power level.
* At any point, only one pm_idle_component call is made for a nexus
* driver instance.
*
* 4. For any events like child attach, it increments pwr_hold and marks
* itslef busy, if it is not already done so. This temporary hold is
* removed when the event is complete.
*
* 5. Any child's power change requires the parent (this driver) to be
* full power. So it raises its power and increments pwr_hold. It also
* marks itself temporarily busy, if it is not already done. This hold
* is removed when the child power change is complete.
*
* 6. After each child power change, it evaluates what is the lowest
* possible power level. If the lowest possible power level is less than
* the current power level and pwr_hold is zero, then it marks itself
* idle. The lowest power level is equal or greater than the highest level
* among the children. It keeps track of children's power level by
* using counters.
*
* 7. Any code e.g., which is accessing the driver's own registers should
* place a temporary hold using pcie_pm_hold.
*/
pcie_pwr_t *pwr_p);
pcie_pwr_t *pwr_p);
#ifdef DEBUG
#else
#define pcie_decode_pwr_op
#endif
/*
* power entry point.
*
* This function decides whether the PM request is honorable.
* If yes, it then does what's necessary for switch or
* bridge to change its power.
*/
/* ARGSUSED */
int
{
if (dip)
return (DDI_SUCCESS);
#endif /* defined(__i386) || defined(__amd64) */
/* PM should not asking for a level, which is unsupported */
PCIE_DBG("%s(%d): pcie_power: change from %d to %d\n",
level);
PCIE_DBG("%s(%d): pcie_power: already at %d\n",
ret = DDI_SUCCESS;
goto pcie_pwr_done;
}
/*
* Going to lower power. Reject this if we are either busy
* or there is a hold.
*/
PCIE_DBG("%s(%d): pcie_power: rejecting change to %d "
goto pcie_pwr_done;
}
/*
* Now we know that we are neither busy nor there is a hold.
* At this point none of the children should be at full power.
* Reject the request if level reqested is lower than the level
* possible.
*/
PCIE_DBG("%s(%d): pcie_power: rejecting level %d as"
" %d is the lowest possible\n",
goto pcie_pwr_done;
}
}
PCIE_DBG("%s(%d): pcie_power: attempt to change to %d "
level);
goto pcie_pwr_done;
}
PCIE_DBG("%s(%d): pcie_power: level changed to %d \n",
ret = DDI_SUCCESS;
return (ret);
}
/*
* Called by pcie_power() only. Caller holds the pwr_lock.
*
* dip - dev_info pointer
* pwr_p - pm info for the node.
* new - new level
*/
static int
{
switch (new) {
case PM_LEVEL_D0:
pmcsr |= PCI_PMCSR_D0;
break;
case PM_LEVEL_D1:
pmcsr |= PCI_PMCSR_D1;
break;
case PM_LEVEL_D2:
pmcsr |= PCI_PMCSR_D2;
break;
case PM_LEVEL_D3:
pmcsr |= PCI_PMCSR_D3HOT;
break;
default:
ASSERT(0);
break;
}
/* Save config space, if going to D3 */
if (new == PM_LEVEL_D3) {
PCIE_DBG("%s(%d): pwr_change: saving config space regs\n",
PCIE_DBG("%s(%d): pcie_pwr_change: failed to save "
return (DDI_FAILURE);
}
}
/*
* TBD: Taken from pci_pci driver. Is this required?
* No bus transactions should occur without waiting for
* settle time specified in PCI PM spec rev 2.1 sec 5.6.1
* To make things simple, just use the max time specified for
* all state transitions.
*/
/*
* Restore config space if coming out of D3
*/
PCIE_DBG("%s(%d): pcie_pwr_change: restoring config space\n",
PCIE_DBG("%s(%d): pcie_pwr_change: failed to restore "
return (DDI_FAILURE);
}
}
return (DDI_SUCCESS);
}
/*
* bus_ctlops.bus_power function.
*
* This function handles PRE_ POST_ change notifications, sent by
* PM framework related to child's power level change. It marks itself
* idle or busy based on the children's power level.
*/
int
{
int new_level;
int old_level;
if (dip)
return (DDI_SUCCESS);
#endif /* defined(__i386) || defined(__amd64) */
switch (op) {
break;
case BUS_POWER_HAS_CHANGED:
break;
default:
break;
}
switch (op) {
PCIE_DBG("%s(%d): pcie_bus_power: %s@%d op %s %d->%d\n",
/*
* If the nexus doesn't want the child to go into
* non-D0 state, mark the child busy. This way PM
* framework will never try to lower the child's power.
* In case of pm_lower_power, marking busy won't help.
* So we need to specifically reject the attempt to
* go to non-D0 state.
*/
if (!PCIE_IS_COMPS_COUNTED(cdip)) {
PCIE_DBG("%s(%d): pcie_bus_power: marking "
"child busy to disable pm \n",
(void) pm_busy_component(cdip, 0);
}
PCIE_DBG("%s(%d): pcie_bus_power: rejecting "
"child's attempt to go to %d\n",
rv = DDI_FAILURE;
}
}
if (rv == DDI_SUCCESS)
return (rv);
case BUS_POWER_HAS_CHANGED:
PCIE_DBG("%s(%d): pcie_bus_power: %s@%d op %s %d->%d\n",
/*
* Child device power changed
* If pm components of this child aren't accounted for
* then add the components to the counters. This can't
* be done in POST_ATTACH ctlop as pm info isn't created
* by then. Also because a driver can make a pm call during
* the attach.
*/
if (!PCIE_IS_COMPS_COUNTED(cdip)) {
(op == BUS_POWER_HAS_CHANGED)) {
PCIE_DBG("%s(%d): pcie_bus_power: marking "
"child busy to disable pm \n",
(void) pm_busy_component(cdip, 0);
/*
* If the driver has already changed to lower
* power(pm_power_has_changed) on its own,
* there is nothing we can do other than
* logging the warning message on the console.
*/
if (new_level < PM_LEVEL_D0)
"%s@%d went to non-D0 state: "
"possible loss of link\n",
}
}
/*
* If it is POST and device PM is supported, release the
* hold done in PRE.
*/
if (op == BUS_POWER_POST_NOTIFICATION &&
}
if (*((int *)result) == DDI_FAILURE) {
PCIE_DBG("%s(%d): pcie_bus_power: change for %s%d "
break;
}
/* Modify counters appropriately */
/* If no device PM, return */
if (!PCIE_SUPPORTS_DEVICE_PM(dip))
break;
/*
* Check conditions for marking busy
* Check the flag to set this busy only once for multiple
* busy conditions. Mark busy if our current lowest possible
* is equal or greater to the current level.
*/
PCIE_DBG("%s(%d): pcie_bus_power: marking busy\n",
(void) pm_busy_component(dip, 0);
break;
}
/*
* Check conditions for marking idle.
* If our lowest possible level is less than our current
* level mark idle. Mark idle only if it is not already done.
*/
/*
* For pci express, we should check here whether
* the link is in L1 state or not.
*/
PCIE_DBG("%s(%d): pcie_bus_power: marking idle\n",
(void) pm_idle_component(dip, 0);
break;
}
break;
default:
}
return (rv);
}
/*
* Decrement the count of children at olevel by one and increment
* count of children at nlevel by one.
*/
static void
{
}
/*
* Returns the lowest possible power level allowed for nexus
* based on children's power level. Lowest possible level is
* equal to the highest level among the children. It also checks
* for the supported level
* UNKNOWN = D0 > D1 > D2 > D3
*/
static int
{
int i, j;
/*
* Search from UNKNOWN to D2. unknown is same as D0.
* find the highest level among the children. If that
* level is supported, return that level. If not,
* find the next higher supported level and return that
* level. For example, if the D1 is the highest among
* children and if D1 isn't supported return D0 as the
* lowest possible level. We don't need to look at D3
* as that is the default lowest level and it is always
* supported.
*/
for (i = PCIE_UNKNOWN_INDEX; i > 0; i--) {
if (counters[i]) {
if (i == PCIE_UNKNOWN_INDEX)
return (PM_LEVEL_D0);
/*
* i is the highest level among children. If this is
* supported, return i.
*/
return (i);
/* find the next higher supported level */
for (j = i + 1; j <= PCIE_D0_INDEX; j++) {
return (j);
}
}
}
return (PM_LEVEL_D3);
}
/*
* Update the counters with number pm components of the child
* all components are assumed to be at UNKNOWN level.
*/
static void
{
if (!comps)
return;
PCIE_DBG("%s(%d): pcie_add_comps: unknown level counter incremented "
"from %d by %d because of %s@%d\n",
/*
* Allocate counters per child. This is a part of pcie
* pm info. If there is no pcie pm info, allocate it here.
* pcie pm info might already be there for pci express nexus
* driver e.g. pcieb. For all leaf nodes, it is allocated here.
*/
}
KM_SLEEP);
}
/*
* Remove the pm components of a child from our counters.
*/
static void
{
int i;
int *child_counters;
if (PCIE_SUPPORTS_DEVICE_PM(dip)) {
/*
* Driver never made a PM call and we didn't create
* any counters for this device. This also means that
* hold made at the PRE_ATTACH time, still remains.
* Remove the hold now. The correct thing to do is to
* stay at full power when a child is at full power
* whether a driver is there or not. This will be
* implemented in the future.
*/
}
return;
}
PCIE_DBG("%s(%d): pcie_remove_comps:counters decremented because of "
/*
* Adjust the nexus counters. No need to adjust per child dip
* counters as we are freeing the per child dip info.
*/
for (i = 0; i < PCIE_MAX_PWR_LEVELS; i++) {
}
/* remove both parent pm info and pcie pminfo itself */
}
/*
* Power management related initialization common to px and pcieb
*/
int
{
int pminfo_created = 0;
/* Create pminfo, if it doesn't exist already */
pminfo_created = 1;
}
/* Initialize the power level and default level support */
goto pwr_common_err;
return (DDI_SUCCESS);
if (pminfo_created) {
}
return (DDI_FAILURE);
}
/*
* Undo whatever is done in pwr_common_setup. Called by px_detach or pxb_detach
*/
void
{
return;
/*
* If the parent didn't store have any pm info about
* this node, that means parent doesn't need pminfo when it handles
* POST_DETACH for this node. For example, if dip is the dip of
* root complex, then there is no parent pm info.
*/
if (!PCIE_PAR_PMINFO(dip)) {
}
}
/*
* Raises the power and marks itself busy.
*/
int
{
/* If no PM info or no device PM, return */
return (DDI_SUCCESS);
/*
* If we are not at full power, then powerup.
* Need to be at full power so that link can be
* at full power.
*/
PCIE_DBG("%s(%d): pm_hold: incrementing hold \n",
/* Mark itself busy, if it is not done already */
PCIE_DBG("%s(%d): pm_hold: marking busy\n",
(void) pm_busy_component(dip, 0);
}
return (DDI_SUCCESS);
}
PCIE_DBG("%s(%d): pm_hold: attempt to raise power "
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* Reverse the things done in pcie_pm_hold
*/
void
{
/* If no PM info or no device PM, return */
return;
}
static void
{
int level;
PCIE_DBG("%s(%d): pm_subrelease: decrementing hold \n",
PCIE_DBG("%s(%d): pm_subrelease: marking idle \n",
(void) pm_idle_component(dip, 0);
}
}
/*
* Called when the child makes the first power management call.
* sets up the counters. All the components of the child device are
* assumed to be at unknown level. It also releases the power hold
* pwr_p - parent's pwr_t
* cdip - child's dip
*/
int
{
/* If no PM info, return */
return (DDI_SUCCESS);
/* If no device power management then return */
if (!PCIE_SUPPORTS_DEVICE_PM(dip))
return (DDI_SUCCESS);
/*
* We have informed PM that we are busy at PRE_ATTACH time for
* this child. Release the hold and but don't clear the busy bit.
* If a device never changes power, hold will not be released
* and we stay at full power.
*/
PCIE_DBG("%s(%d): pm_add_child: decrementing hold \n",
/*
* We must have made sure that busy bit
* is set when we put the hold
*/
return (DDI_SUCCESS);
}
/*
* Adjust the counters when a child detaches
* Marks itself idle if the idle conditions are met.
* Called at POST_DETACH time
*/
int
{
int *counters;
int total;
/* If no PM info, return */
return (DDI_SUCCESS);
/* If no device power management then return */
if (!PCIE_SUPPORTS_DEVICE_PM(dip)) {
return (DDI_SUCCESS);
}
/*
* Mark idle if either there are no children or our lowest
* possible level is less than the current level. Mark idle
* only if it is not already done.
*/
PCIE_DBG("%s(%d): pcie_bus_power: marking idle\n",
(void) pm_idle_component(dip, 0);
}
}
return (DDI_SUCCESS);
}
{
return (bus_p->bus_pcie_off != 0);
}
/*
* Called by px_attach or pcieb_attach:: DDI_RESUME
*/
int
{
if (dip)
return (DDI_SUCCESS);
#endif /* defined(__i386) || defined(__amd64) */
if (PCIE_PMINFO(dip))
if (pwr_p) {
/* Inform the PM framework that dip is at full power */
if (PCIE_SUPPORTS_DEVICE_PM(dip)) {
(void) pm_raise_power(dip, 0,
}
}
/*
* Code taken from pci driver.
* Restore config registers for children that did not save
* their own registers. Children pwr states are UNKNOWN after
* a resume since it is possible for the PM framework to call
* resume without an actual power cycle. (ie if suspend fails).
*/
/*
* Not interested in children who are not already
* init'ed. They will be set up by init_child().
*/
PCIE_DBG("%s(%d): "
"DDI_RESUME: skipping %s%d not in CF1\n",
continue;
}
/*
* Only restore config registers if saved by nexus.
*/
"nexus-saved-config-regs") != 1)
continue;
PCIE_DBG("%s(%d): "
"DDI_RESUME: nexus restoring %s%d config regs\n",
/* clear errors left by OBP scrubbing */
/* PCIe workaround: disable errors during 4K config resore */
(void) pci_restore_config_regs(cdip);
if (is_pcie) {
(void) pcie_enable_ce(cdip);
}
"nexus-saved-config-regs") != DDI_PROP_SUCCESS) {
PCIE_DBG("%s(%d): %s%d can't remove prop %s",
"nexus-saved-config-regs");
}
}
return (DDI_SUCCESS);
}
/*
* Called by pcie_detach or pcieb_detach:: DDI_SUSPEND
*/
int
{
if (dip)
return (DDI_SUCCESS);
#endif /* defined(__i386) || defined(__amd64) */
if (PCIE_PMINFO(dip))
/*
* Mark all children to be unknown and bring our power level
* to full, if required. This is to avoid any panics while
* accessing the child's config space.
*/
if (pwr_p) {
if (PCIE_SUPPORTS_DEVICE_PM(dip) &&
DDI_SUCCESS) {
PCIE_DBG("%s(%d): pwr_suspend: attempt "
"to raise power from %d to %d "
return (DDI_FAILURE);
}
}
/*
* Update the nexus counters. At the resume time all
* components are considered to be at unknown level. Use the
* fact that counters for unknown level are at the end.
*/
for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) {
counters[i] = 0;
}
}
/*
* Code taken from pci driver.
* Save the state of the configuration headers of child
* nodes.
*/
/*
* Not interested in children who are not already
* init'ed. They will be set up in init_child().
*/
PCIE_DBG("%s(%d): DDI_SUSPEND: skipping "
continue;
}
/*
* Update per child dip counters, if any. Counters
* will not exist if the child is not power manageable
* or if its power entry is never invoked.
*/
if (child_counters && pwr_p) {
for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) {
child_counters[i];
child_counters[i] = 0;
}
}
/*
* Only save config registers if not already saved by child.
*/
SAVED_CONFIG_REGS) == 1) {
continue;
}
/*
* The nexus needs to save config registers. Create a property
* so it knows to restore on resume.
*/
"nexus-saved-config-regs") != DDI_PROP_SUCCESS) {
PCIE_DBG("%s(%d): %s%d can't update prop %s",
"nexus-saved-config-regs");
}
PCIE_DBG("%s(%d): DDI_SUSPEND: saving config space for"
/* PCIe workaround: disable errors during 4K config save */
(void) pci_save_config_regs(cdip);
if (is_pcie) {
(void) pcie_enable_ce(cdip);
}
}
return (DDI_SUCCESS);
}
#ifdef DEBUG
/*
* Description of bus_power_op.
*/
typedef struct pcie_buspwr_desc {
char *pwr_desc;
{BUS_POWER_CHILD_PWRCHG, "CHILD_PWRCHG"},
{BUS_POWER_NEXUS_PWRUP, "NEXUS_PWRUP"},
{BUS_POWER_PRE_NOTIFICATION, "PRE_NOTIFICATION"},
{BUS_POWER_POST_NOTIFICATION, "POST_NOTIFICATION"},
{BUS_POWER_HAS_CHANGED, "HAS_CHANGED"},
{BUS_POWER_NOINVOL, "NOINVOL"},
{-1, NULL}
};
/*
* Returns description of the bus_power_op.
*/
static char *
{
}
return ("UNKNOWN OP");
}
#endif