pcie_pwr.c revision 13683ea2d517595c5ea2c90934fd074d24671646
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/kmem.h>
#include <sys/async.h>
#include <sys/sysmacros.h>
#include <sys/sunddi.h>
#include <sys/sunpm.h>
#include <sys/epm.h>
#include <sys/sunndi.h>
#include <sys/ddi_impldefs.h>
#include <sys/ddi_implfuncs.h>
#include <sys/pci.h>
#include <sys/pcie_impl.h>
#include "pcie_pwr.h"
#include "px_pci.h"
#include "px_debug.h"
/*
* This file implements the power management functionality for
* pci express switch and pci express-to-pci/pci-x bridge. All the
* code in this file is generic and is not specific to a particular chip.
* The algorithm, which decides when to go to a lower power is explained
* below:
*
* 1. Initially when no children are attached, the driver is idle from
* PM framework point of view ( PM idle/PM busy).
*
* 2. Driver is PM busy if either a reference count called pwr_hold is
* greater than zero or driver is already at the lowest possible power
* level. The lowest possible power level for the driver is equal to the
* highest power level among its children. The PM busy condition is
* indicated by PCIE_PM_BUSY bit. At any point, only one pm_busy_component
* call is made for a nexus driver instance.
*
* 3. Driver is PM idle if the pwr_hold is zero and the lowest
* possible power level is less than the driver's current power level.
* At any point, only one pm_idle_component call is made for a nexus
* driver instance.
*
* 4. For any events like child attach, it increments pwr_hold and marks
* itslef busy, if it is not already done so. This temporary hold is
* removed when the event is complete.
*
* 5. Any child's power change requires the parent (this driver) to be
* full power. So it raises its power and increments pwr_hold. It also
* marks itself temporarily busy, if it is not already done. This hold
* is removed when the child power change is complete.
*
* 6. After each child power change, it evaluates what is the lowest
* possible power level. If the lowest possible power level is less than
* the current power level and pwr_hold is zero, then it marks itself
* idle. The lowest power level is equal or greater than the highest level
* among the children. It keeps track of children's power level by
* using counters.
*
* 7. Any code e.g., which is accessing the driver's own registers should
* place a temporary hold using pcie_pm_hold.
*/
static int pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new);
static void pwr_update_counters(int *countersp, int olevel, int nlevel);
static int pwr_level_allowed(pcie_pwr_t *pwr_p);
static void pcie_add_comps(dev_info_t *dip, dev_info_t *cdip,
pcie_pwr_t *pwr_p);
static void pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip,
pcie_pwr_t *pwr_p);
static void pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p);
static boolean_t pcie_is_pcie(ddi_acc_handle_t config_handle);
#ifdef DEBUG
static char *pcie_decode_pwr_op(pm_bus_power_op_t op);
#else
#define pcie_decode_pwr_op
#endif
/*
* power entry point.
*
* This function decides whether the PM request is honorable.
* If yes, it then does what's necessary for switch or
* bridge to change its power.
*/
/* ARGSUSED */
int
pcie_power(dev_info_t *dip, int component, int level)
{
pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip);
int *counters = pwr_p->pwr_counters;
int pmcaps = pwr_p->pwr_pmcaps;
int ret = DDI_FAILURE;
ASSERT(level != PM_LEVEL_UNKNOWN);
/* PM should not asking for a level, which is unsupported */
ASSERT(level == PM_LEVEL_D0 || level == PM_LEVEL_D3 ||
(level == PM_LEVEL_D1 && (pmcaps & PCIE_SUPPORTS_D1)) ||
(level == PM_LEVEL_D2 && (pmcaps & PCIE_SUPPORTS_D2)));
mutex_enter(&pwr_p->pwr_lock);
DBG(DBG_PWR, dip, "pcie_power: change from %d to %d\n",
pwr_p->pwr_func_lvl, level);
if (pwr_p->pwr_func_lvl == level) {
DBG(DBG_PWR, dip, "pcie_power: already at %d\n", level);
ret = DDI_SUCCESS;
goto pcie_pwr_done;
}
if (level < pwr_p->pwr_func_lvl) {
/*
* Going to lower power. Reject this if we are either busy
* or there is a hold.
*/
if (pwr_p->pwr_flags & PCIE_PM_BUSY) {
DBG(DBG_PWR, dip, "pcie_power: rejecting change to %d "
"as busy\n", level);
goto pcie_pwr_done;
}
/*
* Now we know that we are neither busy nor there is a hold.
* At this point none of the children should be at full power.
* Reject the request if level reqested is lower than the level
* possible.
*/
ASSERT(!counters[PCIE_D0_INDEX] &&
!counters[PCIE_UNKNOWN_INDEX]);
if (level < pwr_level_allowed(pwr_p)) {
DBG(DBG_PWR, dip, "pcie_power: rejecting level %d as"
" %d is the lowest possible\n", level,
pwr_level_allowed(pwr_p));
goto pcie_pwr_done;
}
}
if (pcie_pwr_change(dip, pwr_p, level) != DDI_SUCCESS) {
DBG(DBG_PWR, dip, "pcie_power: attempt to change to %d "
" failed \n", level);
goto pcie_pwr_done;
}
pwr_p->pwr_func_lvl = level;
DBG(DBG_PWR, dip, "pcie_power: level changed to %d \n", level);
ret = DDI_SUCCESS;
pcie_pwr_done:
mutex_exit(&pwr_p->pwr_lock);
return (ret);
}
/*
* Called by pcie_power() only. Caller holds the pwr_lock.
*
* dip - dev_info pointer
* pwr_p - pm info for the node.
* new - new level
*/
static int
pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new)
{
uint16_t pmcsr;
ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
ASSERT(new != pwr_p->pwr_func_lvl);
pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset);
pmcsr &= ~PCI_PMCSR_STATE_MASK;
switch (new) {
case PM_LEVEL_D0:
pmcsr |= PCI_PMCSR_D0;
break;
case PM_LEVEL_D1:
pmcsr |= PCI_PMCSR_D1;
break;
case PM_LEVEL_D2:
pmcsr |= PCI_PMCSR_D2;
break;
case PM_LEVEL_D3:
pmcsr |= PCI_PMCSR_D3HOT;
break;
default:
ASSERT(0);
break;
}
/* Save config space, if going to D3 */
if (new == PM_LEVEL_D3) {
DBG(DBG_PWR, dip, "pwr_change: saving config space regs\n");
if (pci_save_config_regs(dip) != DDI_SUCCESS) {
DBG(DBG_PWR, dip, "pcie_pwr_change: failed to save "
"config space regs\n");
return (DDI_FAILURE);
}
}
pci_config_put16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset, pmcsr);
/*
* TBD: Taken from pci_pci driver. Is this required?
* No bus transactions should occur without waiting for
* settle time specified in PCI PM spec rev 2.1 sec 5.6.1
* To make things simple, just use the max time specified for
* all state transitions.
*/
delay(drv_usectohz(PCI_CLK_SETTLE_TIME));
/*
* Restore config space if coming out of D3
*/
if (pwr_p->pwr_func_lvl == PM_LEVEL_D3) {
DBG(DBG_PWR, dip, "pcie_pwr_change: restoring config space\n");
if (pci_restore_config_regs(dip) != DDI_SUCCESS) {
DBG(DBG_PWR, dip, "pcie_pwr_change: failed to restore "
"config space regs\n");
return (DDI_FAILURE);
}
}
return (DDI_SUCCESS);
}
/*
* bus_ctlops.bus_power function.
*
* This function handles PRE_ POST_ change notifications, sent by
* PM framework related to child's power level change. It marks itself
* idle or busy based on the children's power level.
*/
int
pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
void *arg, void *result)
{
pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip);
int *counters = pwr_p->pwr_counters; /* nexus counters */
int *child_counters; /* per child dip counters */
pm_bp_child_pwrchg_t *bpc;
pm_bp_has_changed_t *bphc;
dev_info_t *cdip;
int new_level;
int old_level;
int rv = DDI_SUCCESS;
int level_allowed, comp;
switch (op) {
case BUS_POWER_PRE_NOTIFICATION:
case BUS_POWER_POST_NOTIFICATION:
bpc = (pm_bp_child_pwrchg_t *)arg;
cdip = bpc->bpc_dip;
new_level = bpc->bpc_nlevel;
old_level = bpc->bpc_olevel;
comp = bpc->bpc_comp;
break;
case BUS_POWER_HAS_CHANGED:
bphc = (pm_bp_has_changed_t *)arg;
cdip = bphc->bphc_dip;
new_level = bphc->bphc_nlevel;
old_level = bphc->bphc_olevel;
comp = bphc->bphc_comp;
break;
default:
break;
}
ASSERT(pwr_p);
mutex_enter(&pwr_p->pwr_lock);
switch (op) {
case BUS_POWER_PRE_NOTIFICATION:
DBG(DBG_PWR, dip, "bus_power: %s@%d op %s %d->%d\n",
ddi_driver_name(cdip), ddi_get_instance(cdip),
pcie_decode_pwr_op(op), old_level, new_level);
/*
* If the nexus doesn't want the child to go into
* non-D0 state, mark the child busy. This way PM
* framework will never try to lower the child's power.
* In case of pm_lower_power, marking busy won't help.
* So we need to specifically reject the attempt to
* go to non-D0 state.
*/
if (pwr_p->pwr_flags & PCIE_NO_CHILD_PM) {
if (!PCIE_IS_COMPS_COUNTED(cdip)) {
DBG(DBG_PWR, dip, "bus_power: marking child "
"busy to disable pm \n");
(void) pm_busy_component(cdip, 0);
}
if (new_level < PM_LEVEL_D0 && !comp) {
DBG(DBG_PWR, dip, "bus_power: rejecting "
"child's attempt to go to %d\n", new_level);
rv = DDI_FAILURE;
}
}
mutex_exit(&pwr_p->pwr_lock);
if (rv == DDI_SUCCESS)
rv = pcie_pm_hold(dip);
return (rv);
case BUS_POWER_HAS_CHANGED:
case BUS_POWER_POST_NOTIFICATION:
DBG(DBG_PWR, dip, "bus_power: %s@%d op %s %d->%d\n",
ddi_driver_name(cdip), ddi_get_instance(cdip),
pcie_decode_pwr_op(op), old_level, new_level);
/*
* Child device power changed
* If pm components of this child aren't accounted for
* then add the components to the counters. This can't
* be done in POST_ATTACH ctlop as pm info isn't created
* by then. Also because a driver can make a pm call during
* the attach.
*/
if (!PCIE_IS_COMPS_COUNTED(cdip)) {
(void) pcie_pm_add_child(dip, cdip);
if ((pwr_p->pwr_flags & PCIE_NO_CHILD_PM) &&
(op == BUS_POWER_HAS_CHANGED)) {
DBG(DBG_PWR, dip, "bus_power: marking child "
"busy to disable pm \n");
(void) pm_busy_component(cdip, 0);
/*
* If the driver has already changed to lower
* power(pm_power_has_changed) on its own,
* there is nothing we can do other than
* logging the warning message on the console.
*/
if (new_level < PM_LEVEL_D0)
cmn_err(CE_WARN, "!Downstream device "
"%s@%d went to non-D0 state: "
"possible loss of link\n",
ddi_driver_name(cdip),
ddi_get_instance(cdip));
}
}
/*
* If it is POST and device PM is supported, release the
* hold done in PRE.
*/
if (op == BUS_POWER_POST_NOTIFICATION &&
PCIE_SUPPORTS_DEVICE_PM(dip)) {
pcie_pm_subrelease(dip, pwr_p);
}
if (*((int *)result) == DDI_FAILURE) {
DBG(DBG_PWR, dip, "bus_power: change for %s%d failed\n",
ddi_driver_name(cdip), ddi_get_instance(cdip));
break;
}
/* Modify counters appropriately */
pwr_update_counters(counters, old_level, new_level);
child_counters = PCIE_CHILD_COUNTERS(cdip);
pwr_update_counters(child_counters, old_level, new_level);
/* If no device PM, return */
if (!PCIE_SUPPORTS_DEVICE_PM(dip))
break;
level_allowed = pwr_level_allowed(pwr_p);
/*
* Check conditions for marking busy
* Check the flag to set this busy only once for multiple
* busy conditions. Mark busy if our current lowest possible
* is equal or greater to the current level.
*/
if (level_allowed >= pwr_p->pwr_func_lvl &&
!(pwr_p->pwr_flags & PCIE_PM_BUSY)) {
DBG(DBG_PWR, dip, "bus_power: marking busy\n");
(void) pm_busy_component(dip, 0);
pwr_p->pwr_flags |= PCIE_PM_BUSY;
break;
}
/*
* Check conditions for marking idle.
* If our lowest possible level is less than our current
* level mark idle. Mark idle only if it is not already done.
*/
if ((level_allowed < pwr_p->pwr_func_lvl) &&
(pwr_p->pwr_hold == 0) &&
(pwr_p->pwr_flags & PCIE_PM_BUSY)) {
/*
* For pci express, we should check here whether
* the link is in L1 state or not.
*/
DBG(DBG_PWR, dip, "bus_power: marking idle\n");
(void) pm_idle_component(dip, 0);
pwr_p->pwr_flags &= ~PCIE_PM_BUSY;
break;
}
break;
default:
mutex_exit(&pwr_p->pwr_lock);
return (pm_busop_bus_power(dip, impl_arg, op, arg, result));
}
mutex_exit(&pwr_p->pwr_lock);
return (rv);
}
/*
* Decrement the count of children at olevel by one and increment
* count of children at nlevel by one.
*/
static void
pwr_update_counters(int *countersp, int olevel, int nlevel)
{
uint32_t index;
ASSERT(olevel >= PM_LEVEL_UNKNOWN && olevel <= PM_LEVEL_D0);
ASSERT(nlevel >= PM_LEVEL_UNKNOWN && nlevel <= PM_LEVEL_D0);
index = (olevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : olevel);
countersp[index]--;
index = (nlevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : nlevel);
countersp[index]++;
}
/*
* Returns the lowest possible power level allowed for nexus
* based on children's power level. Lowest possible level is
* equal to the highest level among the children. It also checks
* for the supported level
* UNKNOWN = D0 > D1 > D2 > D3
*/
static int
pwr_level_allowed(pcie_pwr_t *pwr_p)
{
int *counters = pwr_p->pwr_counters;
int i, j;
ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
/*
* Search from UNKNOWN to D2. unknown is same as D0.
* find the highest level among the children. If that
* level is supported, return that level. If not,
* find the next higher supported level and return that
* level. For example, if the D1 is the highest among
* children and if D1 isn't supported return D0 as the
* lowest possible level. We don't need to look at D3
* as that is the default lowest level and it is always
* supported.
*/
for (i = PCIE_UNKNOWN_INDEX; i > 0; i--) {
if (counters[i]) {
if (i == PCIE_UNKNOWN_INDEX)
return (PM_LEVEL_D0);
/*
* i is the highest level among children. If this is
* supported, return i.
*/
if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, i))
return (i);
/* find the next higher supported level */
for (j = i + 1; j <= PCIE_D0_INDEX; j++) {
if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, j))
return (j);
}
}
}
return (PM_LEVEL_D3);
}
/*
* Update the counters with number pm components of the child
* all components are assumed to be at UNKNOWN level.
*/
static void
pcie_add_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p)
{
int comps = PM_NUMCMPTS(cdip);
pcie_pm_t *pcie_pm_p;
pcie_pwr_child_t *cpwr_p;
ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
if (!comps)
return;
DBG(DBG_PWR, dip, "pcie_add_comps: unknown level counter incremented "
"from %d by %d because of %s@%d\n",
(pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX], comps,
ddi_driver_name(cdip), ddi_get_instance(cdip));
(pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX] += comps;
/*
* Allocate counters per child. This is a part of pcie
* pm info. If there is no pcie pm info, allocate it here.
* pcie pm info might already be there for pci express nexus
* driver e.g. px_pci. For all leaf nodes, it is allocated here.
*/
if ((pcie_pm_p = PCIE_PMINFO(cdip)) == NULL) {
pcie_pm_p = (pcie_pm_t *)kmem_zalloc(
sizeof (pcie_pm_t), KM_SLEEP);
PCIE_SET_PMINFO(cdip, pcie_pm_p);
}
cpwr_p = (pcie_pwr_child_t *)kmem_zalloc(sizeof (pcie_pwr_child_t),
KM_SLEEP);
pcie_pm_p->pcie_par_pminfo = cpwr_p;
(cpwr_p->pwr_child_counters)[PCIE_UNKNOWN_INDEX] += comps;
}
/*
* Remove the pm components of a child from our counters.
*/
static void
pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p)
{
int i;
int *child_counters;
ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
if (!(PCIE_PMINFO(cdip)) || !PCIE_PAR_PMINFO(cdip)) {
if (PCIE_SUPPORTS_DEVICE_PM(dip)) {
/*
* Driver never made a PM call and we didn't create
* any counters for this device. This also means that
* hold made at the PRE_ATTACH time, still remains.
* Remove the hold now. The correct thing to do is to
* stay at full power when a child is at full power
* whether a driver is there or not. This will be
* implemented in the future.
*/
pcie_pm_subrelease(dip, pwr_p);
}
return;
}
DBG(DBG_PWR, dip, "pcie_remove_comps:counters decremented because of "
"%s@%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip));
child_counters = PCIE_CHILD_COUNTERS(cdip);
/*
* Adjust the nexus counters. No need to adjust per child dip
* counters as we are freeing the per child dip info.
*/
for (i = 0; i < PCIE_MAX_PWR_LEVELS; i++) {
ASSERT((pwr_p->pwr_counters)[i] >= child_counters[i]);
(pwr_p->pwr_counters)[i] -= child_counters[i];
}
/* remove both parent pm info and pcie pminfo itself */
kmem_free(PCIE_PAR_PMINFO(cdip), sizeof (pcie_pwr_child_t));
kmem_free(PCIE_PMINFO(cdip), sizeof (pcie_pm_t));
PCIE_RESET_PMINFO(cdip);
}
/*
* Power management related initialization common to px and px_pci
*/
int
pwr_common_setup(dev_info_t *dip)
{
pcie_pm_t *pcie_pm_p;
pcie_pwr_t *pwr_p;
int pminfo_created = 0;
/* Create pminfo, if it doesn't exist already */
if ((pcie_pm_p = PCIE_PMINFO(dip)) == NULL) {
pcie_pm_p = (pcie_pm_t *)kmem_zalloc(
sizeof (pcie_pm_t), KM_SLEEP);
PCIE_SET_PMINFO(dip, pcie_pm_p);
pminfo_created = 1;
}
pwr_p = (pcie_pwr_t *)kmem_zalloc(sizeof (pcie_pwr_t), KM_SLEEP);
mutex_init(&pwr_p->pwr_lock, NULL, MUTEX_DRIVER, NULL);
/* Initialize the power level and default level support */
pwr_p->pwr_func_lvl = PM_LEVEL_UNKNOWN;
pwr_p->pwr_pmcaps = PCIE_DEFAULT_LEVEL_SUPPORTED;
if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
"pm-want-child-notification?", NULL, NULL) != DDI_PROP_SUCCESS) {
DBG(DBG_PWR, dip, "can't create pm-want-child-notification \n");
goto pwr_common_err;
}
pcie_pm_p->pcie_pwr_p = pwr_p;
return (DDI_SUCCESS);
pwr_common_err:
mutex_destroy(&pwr_p->pwr_lock);
kmem_free(pwr_p, sizeof (pcie_pwr_t));
if (pminfo_created) {
PCIE_RESET_PMINFO(dip);
kmem_free(pcie_pm_p, sizeof (pcie_pm_t));
}
return (DDI_FAILURE);
}
/*
* Undo whatever is done in pwr_common_setup. Called by px_detach or pxb_detach
*/
void
pwr_common_teardown(dev_info_t *dip)
{
pcie_pm_t *pcie_pm_p = PCIE_PMINFO(dip);
pcie_pwr_t *pwr_p;
if (!pcie_pm_p || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
return;
(void) ddi_prop_remove(DDI_DEV_T_NONE, dip,
"pm-want-child-notification?");
mutex_destroy(&pwr_p->pwr_lock);
pcie_pm_p->pcie_pwr_p = NULL;
kmem_free(pwr_p, sizeof (pcie_pwr_t));
/*
* If the parent didn't store have any pm info about
* this node, that means parent doesn't need pminfo when it handles
* POST_DETACH for this node. For example, if dip is the dip of
* root complex, then there is no parent pm info.
*/
if (!PCIE_PAR_PMINFO(dip)) {
kmem_free(pcie_pm_p, sizeof (pcie_pm_t));
PCIE_RESET_PMINFO(dip);
}
}
/*
* Raises the power and marks itself busy.
*/
int
pcie_pm_hold(dev_info_t *dip)
{
pcie_pwr_t *pwr_p;
/* If no PM info or no device PM, return */
if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) ||
!(PCIE_SUPPORTS_DEVICE_PM(dip)))
return (DDI_SUCCESS);
/*
* If we are not at full power, then powerup.
* Need to be at full power so that link can be
* at L0. Similarly for PCI/PCI-X bus, it should be
* at full power.
*/
mutex_enter(&pwr_p->pwr_lock);
ASSERT(pwr_p->pwr_hold >= 0);
DBG(DBG_PWR, dip, "pm_hold: incrementing hold \n");
pwr_p->pwr_hold++;
/* Mark itself busy, if it is not done already */
if (!(pwr_p->pwr_flags & PCIE_PM_BUSY)) {
DBG(DBG_PWR, dip, "pm_hold: marking busy\n");
pwr_p->pwr_flags |= PCIE_PM_BUSY;
(void) pm_busy_component(dip, 0);
}
if (pwr_p->pwr_func_lvl == PM_LEVEL_D0) {
mutex_exit(&pwr_p->pwr_lock);
return (DDI_SUCCESS);
}
mutex_exit(&pwr_p->pwr_lock);
if (pm_raise_power(dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) {
DBG(DBG_PWR, dip, "pm_hold: attempt to raise power "
"from %d to %d failed\n", pwr_p->pwr_func_lvl,
PM_LEVEL_D0);
pcie_pm_release(dip);
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* Reverse the things done in pcie_pm_hold
*/
void
pcie_pm_release(dev_info_t *dip)
{
pcie_pwr_t *pwr_p;
/* If no PM info or no device PM, return */
if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) ||
!(PCIE_SUPPORTS_DEVICE_PM(dip)))
return;
mutex_enter(&pwr_p->pwr_lock);
pcie_pm_subrelease(dip, pwr_p);
mutex_exit(&pwr_p->pwr_lock);
}
static void
pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p)
{
int level;
ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
ASSERT(pwr_p->pwr_hold > 0);
DBG(DBG_PWR, dip, "pm_subrelease: decrementing hold \n");
pwr_p->pwr_hold--;
ASSERT(pwr_p->pwr_hold >= 0);
ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY);
level = pwr_level_allowed(pwr_p);
if (pwr_p->pwr_hold == 0 && level < pwr_p->pwr_func_lvl) {
DBG(DBG_PWR, dip, "pm_subrelease: marking idle \n");
(void) pm_idle_component(dip, 0);
pwr_p->pwr_flags &= ~PCIE_PM_BUSY;
}
}
/*
* Called when the child makes the first power management call.
* sets up the counters. All the components of the child device are
* assumed to be at unknown level. It also releases the power hold
* pwr_p - parent's pwr_t
* cdip - child's dip
*/
int
pcie_pm_add_child(dev_info_t *dip, dev_info_t *cdip)
{
pcie_pwr_t *pwr_p;
/* If no PM info, return */
if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
return (DDI_SUCCESS);
ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0);
pcie_add_comps(dip, cdip, pwr_p);
/* If no device power management then return */
if (!PCIE_SUPPORTS_DEVICE_PM(dip))
return (DDI_SUCCESS);
/*
* We have informed PM that we are busy at PRE_ATTACH time for
* this child. Release the hold and but don't clear the busy bit.
* If a device never changes power, hold will not be released
* and we stay at full power.
*/
ASSERT(pwr_p->pwr_hold > 0);
DBG(DBG_PWR, dip, "pm_add_child: decrementing hold \n");
pwr_p->pwr_hold--;
/*
* We must have made sure that busy bit
* is set when we put the hold
*/
ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY);
return (DDI_SUCCESS);
}
/*
* Adjust the counters when a child detaches
* Marks itself idle if the idle conditions are met.
* Called at POST_DETACH time
*/
int
pcie_pm_remove_child(dev_info_t *dip, dev_info_t *cdip)
{
int *counters;
int total;
pcie_pwr_t *pwr_p;
/* If no PM info, return */
if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
return (DDI_SUCCESS);
counters = pwr_p->pwr_counters;
mutex_enter(&pwr_p->pwr_lock);
pcie_remove_comps(dip, cdip, pwr_p);
/* If no device power management then return */
if (!PCIE_SUPPORTS_DEVICE_PM(dip)) {
mutex_exit(&pwr_p->pwr_lock);
return (DDI_SUCCESS);
}
total = (counters[PCIE_D0_INDEX] + counters[PCIE_UNKNOWN_INDEX] +
counters[PCIE_D1_INDEX] + counters[PCIE_D2_INDEX] +
counters[PCIE_D3_INDEX]);
/*
* Mark idle if either there are no children or our lowest
* possible level is less than the current level. Mark idle
* only if it is not already done.
*/
if ((pwr_p->pwr_hold == 0) &&
(!total || (pwr_level_allowed(pwr_p) < pwr_p->pwr_func_lvl))) {
if (pwr_p->pwr_flags & PCIE_PM_BUSY) {
DBG(DBG_PWR, dip, "bus_power: marking idle\n");
(void) pm_idle_component(dip, 0);
pwr_p->pwr_flags &= ~PCIE_PM_BUSY;
}
}
mutex_exit(&pwr_p->pwr_lock);
return (DDI_SUCCESS);
}
boolean_t
pcie_is_pcie(ddi_acc_handle_t config_handle)
{
uint8_t cap_ptr, cap_id;
cap_ptr = pci_config_get8(config_handle, PCI_BCNF_CAP_PTR);
while (cap_ptr != PCI_CAP_NEXT_PTR_NULL) {
cap_id = pci_config_get8(config_handle, cap_ptr + PCI_CAP_ID);
if (cap_id == PCI_CAP_ID_PCI_E)
return (B_TRUE);
cap_ptr = pci_config_get8(config_handle,
cap_ptr + PCI_CAP_NEXT_PTR);
}
return (B_FALSE);
}
/*
* Called by px_attach or pxb_attach:: DDI_RESUME
*/
int
pcie_pwr_resume(dev_info_t *dip)
{
dev_info_t *cdip;
pcie_pwr_t *pwr_p = NULL;
ddi_acc_handle_t config_handle;
if (PCIE_PMINFO(dip))
pwr_p = PCIE_NEXUS_PMINFO(dip);
if (pwr_p) {
/* Inform the PM framework that dip is at full power */
if (PCIE_SUPPORTS_DEVICE_PM(dip)) {
ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0);
(void) pm_raise_power(dip, 0,
pwr_p->pwr_func_lvl);
}
}
/*
* Code taken from pci driver.
* Restore config registers for children that did not save
* their own registers. Children pwr states are UNKNOWN after
* a resume since it is possible for the PM framework to call
* resume without an actual power cycle. (ie if suspend fails).
*/
for (cdip = ddi_get_child(dip); cdip != NULL;
cdip = ddi_get_next_sibling(cdip)) {
boolean_t is_pcie;
/*
* Not interested in children who are not already
* init'ed. They will be set up by init_child().
*/
if (i_ddi_node_state(cdip) < DS_INITIALIZED) {
DBG(DBG_PWR, dip,
"DDI_RESUME: skipping %s%d not in CF1\n",
ddi_driver_name(cdip), ddi_get_instance(cdip));
continue;
}
/*
* Only restore config registers if saved by nexus.
*/
if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
"nexus-saved-config-regs") != 1)
continue;
DBG(DBG_PWR, dip,
"DDI_RESUME: nexus restoring %s%d config regs\n",
ddi_driver_name(cdip), ddi_get_instance(cdip));
if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) {
DBG(DBG_PWR, dip, "DDI_RESUME: "
"pci_config_setup for %s%d failed\n",
ddi_driver_name(cdip), ddi_get_instance(cdip));
continue;
}
/* clear errors left by OBP scrubbing */
pcie_clear_errors(cdip, config_handle);
/* PCIe workaround: disable errors during 4K config resore */
if (is_pcie = pcie_is_pcie(config_handle))
pcie_disable_errors(cdip, config_handle);
(void) pci_restore_config_regs(cdip);
if (is_pcie) {
pcie_enable_errors(cdip, config_handle);
(void) pcie_enable_ce(cdip, config_handle);
}
pci_config_teardown(&config_handle);
if (ndi_prop_remove(DDI_DEV_T_NONE, cdip,
"nexus-saved-config-regs") != DDI_PROP_SUCCESS) {
DBG(DBG_PWR, dip, "%s%d can't remove prop %s",
ddi_driver_name(cdip), ddi_get_instance(cdip),
"nexus-saved-config-regs");
}
}
return (DDI_SUCCESS);
}
/*
* Called by pcie_detach or pcieb_detach:: DDI_SUSPEND
*/
int
pcie_pwr_suspend(dev_info_t *dip)
{
dev_info_t *cdip;
ddi_acc_handle_t config_handle;
int i, *counters; /* per nexus counters */
int *child_counters = NULL; /* per child dip counters */
pcie_pwr_t *pwr_p = NULL;
if (PCIE_PMINFO(dip))
pwr_p = PCIE_NEXUS_PMINFO(dip);
/*
* Mark all children to be unknown and bring our power level
* to full, if required. This is to avoid any panics while
* accessing the child's config space.
*/
if (pwr_p) {
mutex_enter(&pwr_p->pwr_lock);
if (PCIE_SUPPORTS_DEVICE_PM(dip) &&
pwr_p->pwr_func_lvl != PM_LEVEL_D0) {
mutex_exit(&pwr_p->pwr_lock);
if (pm_raise_power(dip, 0, PM_LEVEL_D0) !=
DDI_SUCCESS) {
DBG(DBG_PWR, dip, "pwr_suspend: attempt "
"to raise power from %d to %d "
"failed\n", pwr_p->pwr_func_lvl,
PM_LEVEL_D0);
return (DDI_FAILURE);
}
mutex_enter(&pwr_p->pwr_lock);
}
counters = pwr_p->pwr_counters;
/*
* Update the nexus counters. At the resume time all
* components are considered to be at unknown level. Use the
* fact that counters for unknown level are at the end.
*/
for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) {
counters[PCIE_UNKNOWN_INDEX] += counters[i];
counters[i] = 0;
}
mutex_exit(&pwr_p->pwr_lock);
}
/*
* Code taken from pci driver.
* Save the state of the configuration headers of child
* nodes.
*/
for (cdip = ddi_get_child(dip); cdip != NULL;
cdip = ddi_get_next_sibling(cdip)) {
boolean_t is_pcie;
/*
* Not interested in children who are not already
* init'ed. They will be set up in init_child().
*/
if (i_ddi_node_state(cdip) < DS_INITIALIZED) {
DBG(DBG_PWR, dip, "DDI_SUSPEND: skipping "
"%s%d not in CF1\n", ddi_driver_name(cdip),
ddi_get_instance(cdip));
continue;
}
/*
* Update per child dip counters, if any. Counters
* will not exist if the child is not power manageable
* or if its power entry is never invoked.
*/
if (PCIE_PMINFO(cdip) && PCIE_PAR_PMINFO(cdip))
child_counters = PCIE_CHILD_COUNTERS(cdip);
if (child_counters && pwr_p) {
mutex_enter(&pwr_p->pwr_lock);
for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) {
child_counters[PCIE_UNKNOWN_INDEX] +=
child_counters[i];
child_counters[i] = 0;
}
mutex_exit(&pwr_p->pwr_lock);
}
/*
* Only save config registers if not already saved by child.
*/
if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
SAVED_CONFIG_REGS) == 1) {
continue;
}
/*
* The nexus needs to save config registers. Create a property
* so it knows to restore on resume.
*/
if (ndi_prop_create_boolean(DDI_DEV_T_NONE, cdip,
"nexus-saved-config-regs") != DDI_PROP_SUCCESS) {
DBG(DBG_PWR, dip, "%s%d can't update prop %s",
ddi_driver_name(cdip), ddi_get_instance(cdip),
"nexus-saved-config-regs");
}
DBG(DBG_PWR, dip, "DDI_SUSPEND: saving config space for"
" %s%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip));
/* PCIe workaround: disable errors during 4K config save */
if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) {
DBG(DBG_PWR, dip, "DDI_SUSPEND: pci_config_setup "
"for %s%d failed\n",
ddi_driver_name(cdip), ddi_get_instance(cdip));
continue;
}
if (is_pcie = pcie_is_pcie(config_handle))
pcie_disable_errors(cdip, config_handle);
(void) pci_save_config_regs(cdip);
if (is_pcie) {
pcie_enable_errors(cdip, config_handle);
(void) pcie_enable_ce(cdip, config_handle);
}
pci_config_teardown(&config_handle);
}
return (DDI_SUCCESS);
}
#ifdef DEBUG
/*
* Description of bus_power_op.
*/
typedef struct pcie_buspwr_desc {
pm_bus_power_op_t pwr_op;
char *pwr_desc;
} pcie_buspwr_desc_t;
static pcie_buspwr_desc_t pcie_buspwr_desc[] = {
{BUS_POWER_CHILD_PWRCHG, "CHILD_PWRCHG"},
{BUS_POWER_NEXUS_PWRUP, "NEXUS_PWRUP"},
{BUS_POWER_PRE_NOTIFICATION, "PRE_NOTIFICATION"},
{BUS_POWER_POST_NOTIFICATION, "POST_NOTIFICATION"},
{BUS_POWER_HAS_CHANGED, "HAS_CHANGED"},
{BUS_POWER_NOINVOL, "NOINVOL"},
{-1, NULL}
};
/*
* Returns description of the bus_power_op.
*/
static char *
pcie_decode_pwr_op(pm_bus_power_op_t op)
{
pcie_buspwr_desc_t *descp = pcie_buspwr_desc;
for (; descp->pwr_desc; descp++) {
if (op == descp->pwr_op)
return (descp->pwr_desc);
}
return ("UNKNOWN OP");
}
#endif