cma_page.c revision a307a2550e1618a7971751ee83c22441ced27bbe
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Page retirement can be an extended process due to the fact that a retirement
* may not be possible when the original request is made. The kernel will
* repeatedly attempt to retire a given page, but will not let us know when the
* page has been retired. We therefore have to poll to see if the retirement
* has been completed. This poll is implemented with a bounded exponential
* backoff to reduce the burden which we impose upon the system.
*
* To reduce the burden on fmd in the face of retirement storms, we schedule
* all retries as a group. In the simplest case, we attempt to retire a single
* page. When forced to retry, we initially schedule a retry at a configurable
* interval t. If the retry fails, we schedule another at 2 * t, and so on,
* until t reaches the maximum interval (also configurable). Future retries
* for that page will occur with t equal to the maximum interval value. We
* will never give up on a retirement.
*
* With multiple retirements, the situation gets slightly more complicated. As
* indicated above, we schedule retries as a group. We don't want to deny new
* pages their short retry intervals, so we'll (re)set the retry interval to the
* value appropriate for the newest page.
*/
#include <cma.h>
#include <time.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <strings.h>
static int
{
char *fmribuf;
return (-1); /* errno is set for us */
return (-1); /* errno is set for us */
}
NV_ENCODE_NATIVE, 0)) != 0) {
return (-1); /* errno is set for us */
}
if (rc < 0) {
return (-1);
}
return (0);
}
static void
{
}
/*
* Retire the specified ASRU, referring to a memory page by PA or by DIMM
* offset (i.e. the encoded coordinates internal bank, row, and column).
* In the initial FMA implementation, fault.memory.page exported an ASRU
* with an explicit physical address, which is valid at the initial time of
* diagnosis but may not be later following DR, DIMM removal, or interleave
* changes. On SPARC, this issue was solved by exporting the DIMM offset
* and pushing the entire FMRI to the platform memory controller through
* On x64, we also use DIMM and offset, but the mem:/// unum string is an
* encoded hc:/// FMRI that is then used by the x64 memory controller driver.
* At some point these three approaches need to be rationalized: all platforms
* should use the same scheme, either with decoding in the kernel or decoding
* in userland (i.e. with a libtopo method to compute and update the PA).
*/
/*ARGSUSED*/
void
{
char *unumstr;
/* It should already be expanded, but we'll do it again anyway */
return;
}
return;
}
if (!cma.cma_page_doretire) {
return;
}
return;
}
/*
* If the unum is an hc fmri string expand it to an fmri and include
* that in a modified asru nvlist.
*/
int err;
topo_strerror(err));
return;
}
return;
}
unumfmri) != 0) {
"unumfmri to modified asru");
return;
}
}
if (asrucp)
return;
if (asrucp)
return;
}
/*
* The page didn't immediately retire. We'll need to periodically
* check to see if it has been retired.
*/
if (asrucp) {
} else {
}
if (cma.cma_page_timerid != 0)
}
static int
{
return (1); /* no longer a page to retire */
}
return (1); /* page retired */
}
return (0); /* schedule another retry */
} else {
"retirement: page isn't scheduled for retirement\n",
} else {
}
return (1); /* give up */
}
}
void
{
cma_page_t **pagep;
cma.cma_page_timerid = 0;
/*
* Successful retry or we're giving up - remove from
* the list
*/
} else if (cma.cma_page_maxretries == 0 ||
page->pg_nretries++;
} else {
/*
* Tunable maxretries was set and we reached
* the max, so just close the case.
*/
"giving up page retire 0x%llx on retry %u\n",
}
}
}
return; /* no more retirements */
/*
* We still have retirements that haven't completed. Back the delay
* off, and schedule a retry.
*/
}
void
{
}
}