mcamd_drv.c revision 7991dd244dd6e9bd35355640fc39c8fe3300c4fb
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/x86_archext.h>
#include <sys/cpu_module.h>
#include <qsort.h>
#include <sys/pci_cfgspace.h>
#include <mcamd.h>
#include <mcamd_dimmcfg.h>
#include <mcamd_pcicfg.h>
#include <mcamd_api.h>
/*
* Set to prevent mc-amd from attaching.
*/
int mc_no_attach = 0;
/*
* Of the 754/939/940 packages, only socket 940 supports quadrank registered
* dimms. Unfortunately, no memory-controller register indicates the
* presence of quadrank dimm support or presence (i.e., in terms of number
* of slots per cpu, and chip-select lines per slot, The following may be set
*
* There is no need to set this for F(1207) and S1g1.
*/
int mc_quadranksupport = 0;
int mc_hold_attached = 1;
#define MAX(m, n) ((m) >= (n) ? (m) : (n))
#define MIN(m, n) ((m) <= (n) ? (m) : (n))
/*
* The following tuneable is used to determine the DRAM scrubbing rate.
* The values range from 0x00-0x16 as described in the BKDG. Zero
* disables DRAM scrubbing. Values above zero indicate rates in descending
* order.
*
* The default value below is used on several Sun systems. In the future
* this code should assign values dynamically based on memory sizing.
*/
enum {
MC_SCRUB_BIOSDEFAULT, /* retain system default value */
MC_SCRUB_FIXED, /* assign mc_scrub_rate_* values */
MC_SCRUB_MAX /* assign max of system and tunables */
static void
{
return;
mc->mc_snapshotsz = 0;
mc->mc_snapshotgen++;
}
static int
{
return (0);
NV_ENCODE_XDR, KM_SLEEP) != 0)
return (-1);
return (0);
}
static mc_t *
{
return (mc);
}
return (NULL);
}
/*
* Read config register pairs into the two arrays provided on the given
* handle and at offsets as follows:
*
* Index Array r1 offset Array r2 offset
* 0 r1addr r2addr
* 1 r1addr + incr r2addr + incr
* 2 r1addr + 2 * incr r2addr + 2 * incr
* ...
* n - 1 r1addr + (n - 1) * incr r2addr + (n - 1) * incr
*
* The number of registers to read into the r1 array is r1n; the number
* for the r2 array is r2n.
*/
static void
{
int i;
if (i < r1n)
if (i < r2n)
}
}
/*ARGSUSED*/
static int
{
return (CMI_HDL_WALK_DONE);
} else {
return (CMI_HDL_WALK_NEXT);
}
}
static void
{
const char *s;
s = "Unknown"; /* no cpu for this chipid found */
else
s = cmi_hdl_getsocketstr(hdl);
}
static uint32_t
{
union mcreg_nbcfg nbcfg;
}
static uint32_t
{
union mcreg_nbcfg nbcfg;
}
static void
{
}
static void
{
int valfound;
}
static void
{
int nelem, i;
char csname[MCDCFG_CSNAMELEN];
/*
* It is possible for an mc_cs_t not to have associated
* DIMM info if mcdcfg_lookup failed.
*/
sizeof (csname));
}
sizeof (csname));
}
}
/* Add cslist nvlist array even if zero members */
for (i = 0; i < nelem; i++)
nvlist_free(cslist[i]);
}
static void
{
int nelem, i;
char *csnamep[4];
int ncs = 0;
for (i = 0; i < MC_CHIP_DIMMRANKMAX; i++) {
ncs++;
}
}
}
/* Add dimmlist nvlist array even if zero members */
for (i = 0; i < nelem; i++)
nvlist_free(dimmlist[i]);
}
static void
{
int i;
if (ndcnt <= MC_CHIP_MAXNODES) {
}
}
}
static nvlist_t *
{
/*
* Since this nvlist is used in populating the topo tree changes
* made here may propogate through to changed property names etc
* in the topo tree. Some properties in the topo tree will be
* contracted via ARC, so be careful what you change here.
*/
return (mcnvl);
}
/*
* Link a dimm to its associated chip-selects and chip-select lines.
* Total the size of all ranks of this dimm.
*/
static void
{
int i;
/* Skip to first unused rank slot */
for (i = 0; i < MC_CHIP_DIMMRANKMAX; i++) {
break;
} else {
}
}
ASSERT(i != MC_CHIP_DIMMRANKMAX);
}
/*
* Create a dimm structure and call to link it to its associated chip-selects.
*/
static mc_dimm_t *
{
return (mcd);
}
/*
* The chip-select structure includes an array of dimms associated with
* that chip-select. This function fills that array, and also builds
* the list of all dimms on this memory controller mc_dimmlist. The
* caller has filled a structure with all there is to know about the
* associated dimm(s).
*/
static void
{
int nfound = 0;
int i;
/*
* Has some other chip-select already created this dimm or dimms?
* If so then link to the dimm(s) from the mccs_dimm array,
* record their topo numbers in the csp_dimmnums array, and link
* the dimm(s) to the additional chip-select.
*/
}
}
if (nfound == 0) {
else
} else {
}
}
/* The rank number is constant across all constituent dimm(s) */
}
/*
* mc_dimmlist_create is called after we have discovered all enabled
* (and spare or testfailed on revs F and G) chip-selects on the
* given memory controller. For each chip-select we must derive
* pair may be associated with up to 2 chip-select lines (in 128 bit mode)
* and that any one dimm may be associated with 1, 2, or 4 chip-selects
* depending on whether it is single, dual or quadrank.
*/
static void
{
union mcreg_dramcfg_hi *drcfghip =
/*
* Are we dealing with quadrank registered dimms?
*
* For socket 940 we can't tell and we'll assume we're not.
* mc_quadranksupport nonzero. A possible optimisation in systems
* that export an SMBIOS table would be to count the number of
* dimm slots per cpu - more than 4 would indicate no quadrank support
* and 4 or fewer would indicate that if we see any of the upper
* chip-selects enabled then a quadrank dimm is present.
*
* For socket F(1207) we can check a bit in the dram config high reg.
*
* Other socket types do not support registered dimms.
*/
r4 = mc_quadranksupport != 0;
/*
* Are we dealing with quadrank SO-DIMMs? These are supported
* in AM2 and S1g1 packages only, but in all rev F/G cases we
* can detect their presence via a bit in the dram config high reg.
*/
/*
* If lookup fails we will not create dimm structures for
* this chip-select. In the mc_cs_t we will have both
* csp_dimmnum members set to MC_INVALNUM and patounum
* code will see from those that we do not have dimm info
* for this chip-select.
*/
continue;
}
}
static mc_cs_t *
{
int i;
for (i = 0; i < MC_CHIP_DIMMPERCS; i++)
if (spare)
return (mccs);
}
/*
* For any cs# of this mc marked TestFail generate an ereport with
* resource identifying the associated dimm(s).
*/
static void
{
int i;
unum.unum_board = 0;
for (i = 0; i < MC_CHIP_DIMMPERCS; i++)
&unum,
}
}
}
/*
* Function 0 - HyperTransport Technology Configuration
*/
static void
{
union mcreg_nodeid nodeid;
int i;
for (i = 0, offset = MC_HT_REG_RTBL_NODE_0;
i < HT_COHERENTNODES(&nodeid);
i++, offset += MC_HT_REG_RTBL_INCR)
}
/*
* Function 1 Configuration - Address Map (see BKDG 3.4.4 DRAM Address Map)
*
* Read the Function 1 Address Map for each potential DRAM node. The Base
* Address for a node gives the starting system address mapped at that node,
* and the limit gives the last valid address mapped at that node. Regions for
* different nodes should not overlap, unless node-interleaving is enabled.
* The base register also indicates the node-interleaving settings (IntlvEn).
* The limit register includes IntlvSel which determines which 4K blocks will
* be routed to this node and the destination node ID for addresses that fall
* within the [base, limit] range - this must match the pair number.
*/
static void
{
union mcreg_drambase basereg;
union mcreg_dramlimit limreg;
union mcreg_dramhole hole;
/*
* Derive some "cooked" properties for nodes that have a range of
* physical addresses that are read or write enabled and for which
* the DstNode matches the node we are attaching.
*/
}
/*
* The Function 1 DRAM Hole Address Register tells us which node(s)
* own the DRAM space that is hoisted above 4GB, together with the
* hole base and offset for this node. This was introduced in
* revision E.
*/
}
}
/*
* Read some function 3 parameters via PCI Mechanism 1 accesses (which
* will serialize any NB accesses).
*/
static void
{
union mcreg_nbcfg nbcfg;
union mcreg_sparectl sparectl;
}
}
}
static int
{
return (0);
return (-1);
else
return (1);
}
/*
* The following are for use in simulating TestFail for a chip-select
* without poking at the hardware (which tends to get upset if you do
* since the BIOS needs to restart to map a failed cs out). For internal
* testing only! Note that setting these does not give the full experience -
* the select chip-select *is* enabled and can give errors etc and the
* patounum logic will get confused.
*/
int testfail_mcnum = -1;
int testfail_csnum = -1;
/*
* Function 2 configuration - DRAM Controller
*/
static void
{
union mcreg_dramcfg_lo drcfg_lo;
union mcreg_dramcfg_hi drcfg_hi;
union mcreg_drammisc drmisc;
union mcreg_bankaddrmap baddrmap;
int maskdivisor;
int wide = 0;
int i;
/*
* Read Function 2 DRAM Configuration High and Low registers. The High
* part is mostly concerned with memory clocks etc and we'll not have
* any use for that. The Low component tells us if ECC is enabled,
* if we're in 64- or 128-bit MC mode, how the upper chip-selects
* are mapped, which chip-select pairs are using x4 parts, etc.
*/
/*
* Note the DRAM controller width. The 64/128 bit is in a different
* bit position for revision F and G.
*/
} else {
}
/*
* Read Function 2 DRAM Controller Miscellaenous Regsiter for those
* revs that support it. This include the Mod64Mux indication on
* these revs - for rev E it is in DRAM config low.
*/
}
/*
* Read Function 2 DRAM Bank Address Mapping. This encodes the
* type of DIMM module in use for each chip-select pair.
* Prior ro revision F it also tells us whether BankSwizzle mode
* is enabled - in rev F that has moved to dram config hi register.
*/
/*
* Determine whether bank swizzle mode is active. Bank swizzling was
* introduced as an option in rev E, but the bit that indicates it
* is enabled has moved in revs F/G.
*/
mcp->mcp_bnkswzl =
}
/*
* Read the DRAM CS Base and DRAM CS Mask registers. Revisions prior
* to F have an equal number of base and mask registers; revision F
* has twice as many base registers as masks.
*/
/*
* Create a cs node for each enabled chip-select as well as
* any appointed online spare chip-selects and for any that have
* failed test.
*/
for (i = 0; i < MC_CHIP_NCS; i++) {
} else {
spare = 0;
testfail = 0;
}
/* Testing hook */
testfail = 1;
}
/*
* If the chip-select is not enabled then skip it unless
* it is a designated online spare or is marked with TestFail.
*/
continue;
/*
* For an enabled or spare chip-select the Bank Address Mapping
* register will be valid as will the chip-select mask. The
* base will not be valid but we'll read and store it anyway.
* We will not know whether the spare is already swapped in
* until MC function 3 attaches.
*/
continue;
} else {
sz = 0;
}
else
/*
* Check for cs bank interleaving - some bits clear in the
* if cs interleaving is active.
*/
ibits++;
}
}
}
/*
* If there is no chip-select interleave on this node determine
* whether the chip-select ranks are contiguous or if there
* is a hole.
*/
int ncsbe = 0;
}
if (ncsbe != 0) {
(int (*)(const void *, const void *))csbasecmp);
for (i = 1; i < ncsbe; i++) {
}
}
}
/*
* Since we do not attach to MC function 3 go ahead and read some
* config parameters from it now.
*/
/*
* we divine the associated DIMM configuration.
*/
}
typedef struct mc_bind_map {
const char *bm_bindnm; /* attachment binding name */
const char *bm_model; /* value for device node model property */
/*
* Do not attach to MC function 3 - agpgart already attaches to that.
* Function 3 may be a good candidate for a nexus driver to fan it out
* into virtual devices by functionality. We will use pci_mech1_getl
* to retrieve the function 3 parameters we require.
*/
static const mc_bind_map_t mc_bind_map[] = {
"AMD Memory Controller (HT Configuration)", mc_mkprops_htcfg },
"AMD Memory Controller (Address Map)", mc_mkprops_addrmap },
"AMD Memory Controller (DRAM Controller & HT Trace)",
};
/*ARGSUSED*/
static int
{
return (EINVAL);
return (EINVAL);
}
return (0);
}
/*ARGSUSED*/
static int
{
return (0);
}
/*
* Enable swap from chip-select csnum to the spare chip-select on this
* memory controller (if any).
*/
static int
{
union mcreg_sparectl sparectl;
union mcreg_scrubctl scrubctl;
int i = 0;
return (ENOTSUP); /* MC rev does not offer online spare */
return (ENODEV); /* Supported, but no spare configured */
return (EBUSY); /* Spare already swapped in */
return (EINVAL); /* Can't spare the spare! */
break;
}
return (EINVAL); /* nominated bad CS does not exist */
/*
* If the DRAM Scrubber is not enabled then the swap cannot succeed.
*/
return (ENODEV); /* DRAM scrubber not enabled */
/*
* Read Online Spare Comtrol Register again, just in case our
* state does not reflect reality.
*/
return (EBUSY);
/* Write to the BadDramCs field */
MCREG_VAL32(&sparectl));
/* And request that the swap to the spare start */
MCREG_VAL32(&sparectl));
/*
* Poll for SwapDone - we have disabled notification by interrupt.
* Swap takes "several CPU cycles, depending on the DRAM speed, but
* is performed in the background" (Family 0Fh Bios Porting Guide).
* We're in a slow ioctl path so there is no harm in waiting around
* a bit - consumers of the ioctl must be aware that it may take
* a moment. We will poll for up to mc_swapdonetime seconds,
* limiting that to 120s.
*
* The swap is performed by the DRAM scrubber (which must be enabled)
* whose scrub rate is accelerated for the duration of the swap.
* The maximum swap rate is 40.0ns per 64 bytes, so the maximum
* supported cs size of 16GB would take 10.7s at that max rate
*/
do {
if (i++ < 20)
else
return (ETIME); /* Operation timed out */
return (0);
}
/*ARGSUSED*/
static int
{
int rc = 0;
return (EINVAL);
return (EINVAL);
}
switch (cmd) {
case MC_IOC_SNAPSHOT_INFO: {
if (mc_snapshot_update(mc) < 0) {
return (EIO);
}
mode) < 0)
break;
}
case MC_IOC_SNAPSHOT:
if (mc_snapshot_update(mc) < 0) {
return (EIO);
}
mode) < 0)
break;
case MC_IOC_ONLINESPARE_EN:
return (EPERM);
}
if (!rw_tryupgrade(&mc_lock)) {
return (EAGAIN);
}
}
break;
}
return (rc);
}
nodev, /* not a block driver */
nodev, /* no print routine */
nodev, /* no dump routine */
nodev, /* no read routine */
nodev, /* no write routine */
nodev, /* no devmap routine */
nodev, /* no mmap routine */
nodev, /* no segmap routine */
nochpoll, /* no chpoll routine */
0, /* not a STREAMS driver */
};
/*ARGSUSED*/
static int
{
int rc = DDI_SUCCESS;
if (infocmd != DDI_INFO_DEVT2DEVINFO &&
return (DDI_FAILURE);
}
rc = DDI_FAILURE;
} else if (infocmd == DDI_INFO_DEVT2DEVINFO) {
} else {
}
return (rc);
}
/*ARGSUSED2*/
static int
{
return (fmerr->fme_status);
}
static void
{
}
static void
{
int m;
int rc = 0;
if (rc == 0) {
} else {
#ifdef DEBUG
#endif /* DEBUG */
return;
}
#ifdef DEBUG
"!mc reads smbios base boards info failed");
#endif /* DEBUG */
}
}
/*ARGSUSED*/
static int
{
return (CMI_HDL_WALK_DONE);
} else {
return (CMI_HDL_WALK_NEXT);
}
}
static mc_t *
{
/*
* Find a handle for one of a chip's CPU.
*
* We can use one of the chip's CPUs since all cores
* of a chip share the same revision and socket type.
*/
return (NULL); /* no cpu for this chipid found! */
return (mc);
}
/*
* Return the maximum scrubbing rate between r1 and r2, where r2 is extracted
* from the specified 'cfg' register value using 'mask' and 'shift'. If a
* value is zero, scrubbing is off so return the opposite value. Otherwise
* the maximum rate is the smallest non-zero value of the two values.
*/
static uint32_t
{
}
/*
* Enable the memory scrubber. We must use the mc_pcicfg_{get32,put32}_nohdl
* interfaces since we do not bind to function 3.
*/
{
union mcreg_scrubctl scrubctl;
union mcreg_dramscrublo dalo;
union mcreg_dramscrubhi dahi;
if (mc_scrub_policy == MC_SCRUB_BIOSDEFAULT)
/*
* Disable DRAM scrubbing while we fiddle.
*/
MCREG_VAL32(&scrubctl));
/*
* Setup DRAM Scrub Address Low and High registers for the
* base address of this node, and to select srubber redirect.
*/
MCREG_VAL32(&dalo));
MCREG_VAL32(&dahi));
"resetting to 0x%x\n", AMD_NB_SCRUBCTL_RATE_MAX);
}
switch (mc_scrub_policy) {
case MC_SCRUB_FIXED:
/* Use the system value checked above */
break;
default:
"using default policy of MC_SCRUB_MAX", mc_scrub_policy);
/*FALLTHRU*/
case MC_SCRUB_MAX:
break;
}
/*
* OPTERON_ERRATUM_99:
* This erratum applies on revisions D and earlier.
* This erratum also applies on revisions E and later,
* if BIOS uses chip-select hoisting instead of DRAM hole
* mapping.
*
* Do not enable the dram scrubber if the chip-select ranges
* for the node are not contiguous.
*/
if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE &&
mc->mc_csdiscontig) {
"%s chip %d because DRAM hole is present on this node",
}
/*
* OPTERON_ERRATUM_101:
* This erratum applies on revisions D and earlier.
*
* If the DRAM Base Address register's IntlvEn field indicates that
* node interleaving is enabled, we must disable the DRAM scrubber
* and return zero to indicate that Solaris should use s/w instead.
*/
if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE &&
"%s chip %d because DRAM memory is node-interleaved",
}
if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE) {
MCREG_VAL32(&scrubctl));
}
return (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE ?
}
/*ARGSUSED*/
static int
{
}
return (CMI_HDL_WALK_NEXT);
}
static int mc_sw_scrub_disabled = 0;
static int
{
const mc_bind_map_t *bm;
const char *bindnm;
enum mc_funcnum func;
long unitaddr;
/*
* This driver has no hardware state, but does
* claim to have a reg property, so it will be
* called on suspend. It is probably better to
* make sure it doesn't get called on suspend,
* but it is just as easy to make sure we just
* return DDI_SUCCESS if called.
*/
if (cmd == DDI_RESUME)
return (DDI_SUCCESS);
return (DDI_FAILURE);
break;
}
}
return (DDI_FAILURE);
/*
* We need the device number, which corresponds to the processor node
* number plus 24. The node number can then be used to associate this
* memory controller device with a given processor chip.
*/
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
break;
}
/* Integrate this memory controller device into existing set */
/*
* We don't complain here because this is a legitimate
* path for MP systems. On those machines, we'll attach
* before all CPUs have been initialized, and thus the
* chip verification in mc_create will fail. We'll be
* reattached later for those CPUs.
*/
return (DDI_FAILURE);
}
} else {
}
/* Beyond this point, we're committed to creating this node */
/*
* Add the common properties to this node, and then add any properties
* that are specific to this node based upon its configuration space.
*/
(void) ddi_prop_update_string(DDI_DEV_T_NONE,
(void) ddi_prop_update_int(DDI_DEV_T_NONE,
}
/*
* If this is the last node to be attached for this memory controller,
* then create the minor node, enable scrubbers, and register with
* cpu module(s) for this chip.
*/
if (func == MC_FUNC_DEVIMAP) {
int dram_present = 0;
0) != DDI_SUCCESS) {
"%d memory controller\n",
}
/*
* Register the memory controller for every CPU of this chip.
*
* If there is memory present on this node and ECC is enabled
* attempt to enable h/w memory scrubbers for this node.
* If we are successful in enabling *any* hardware scrubbers,
* disable the software memory scrubber.
*/
NULL);
/*
* This node may map non-dram memory alone, so we
* must check for an enabled chip-select to be
* sure there is dram present.
*/
dram_present = 1;
break;
}
}
}
/*
* On a single chip system there is no point in
* scrubbing if there is no ECC on the single node.
* On a multichip system, necessarily Opteron using
* registered ECC-capable DIMMs, if there is memory
* present on a node but no ECC there then we'll assume
* ECC is disabled for all nodes and we will not enable
* the scrubber and wll also disable the software
* memscrub thread.
*/
rc = 1;
} else if (!dram_present) {
/* No memory on this node - others decide memscrub */
rc = 0;
} else {
/*
* There is memory on this node and ECC is enabled.
* Call via the cpu module to enable memory scrubbing
* on this node - we could call directly but then
* we may overlap with a request to enable chip-cache
* scrubbing.
*/
}
}
/*
*/
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static int
{
/*
* See the comment about suspend in
* mc_attach().
*/
if (cmd == DDI_SUSPEND)
return (DDI_SUCCESS);
else
return (DDI_FAILURE);
}
DEVO_REV, /* devo_rev */
0, /* devo_refcnt */
mc_getinfo, /* devo_getinfo */
nulldev, /* devo_identify */
nulldev, /* devo_probe */
mc_attach, /* devo_attach */
mc_detach, /* devo_detach */
nodev, /* devo_reset */
&mc_cb_ops, /* devo_cb_ops */
NULL, /* devo_bus_ops */
NULL, /* devo_power */
ddi_quiesce_not_needed, /* devo_quiesce */
};
"Memory Controller for AMD processors",
};
static struct modlinkage modlinkage = {
(void *)&modldrv,
};
int
_init(void)
{
/*
* Refuse to load if there is no PCI config space support.
*/
if (pci_getl_func == NULL)
return (ENOTSUP);
return (mod_install(&modlinkage));
}
int
{
}
int
_fini(void)
{
int rc;
return (rc);
return (0);
}