/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* sun4u Memory Scrubbing
*
* On detection of a correctable memory ECC error, the sun4u kernel
* returns the corrected data to the requester and re-writes it
* to memory (DRAM). So if the correctable error was transient,
* the read has effectively been cleaned (scrubbed) from memory.
*
* Scrubbing thus reduces the likelyhood that multiple transient errors
* will occur in the same memory word, making uncorrectable errors due
* to transients less likely.
*
* Thus is born the desire that every memory location be periodically
* accessed.
*
* This file implements a memory scrubbing thread. This scrubber
* guarantees that all of physical memory is accessed periodically
* (memscrub_period_sec -- 12 hours).
*
* It attempts to do this as unobtrusively as possible. The thread
* schedules itself to wake up at an interval such that if it reads
* memscrub_span_pages (32MB) on each wakeup, it will read all of physical
* memory in in memscrub_period_sec (12 hours).
*
* The scrubber uses the block load and prefetch hardware to read memory
* @ 1300MB/s, so it reads spans of 32MB in 0.025 seconds. Unlike the
* original sun4d scrubber the sun4u scrubber does not read ahead if the
* system is idle because we can read memory very efficently.
*
* The scrubber maintains a private copy of the phys_install memory list
* to keep track of what memory should be scrubbed.
*
* The global routines memscrub_add_span() and memscrub_delete_span() are
* used to add and delete from this list. If hotplug memory is later
* supported these two routines can be used to notify the scrubber of
* memory configuration changes.
*
*
* memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB)
* memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
* memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI)
* memscrub_delay_start_sec = (5 minutes)
* memscrub_verbose = (0)
* memscrub_override_ticks = (1 tick)
* disable_memscrub = (0)
* pause_memscrub = (0)
* read_all_memscrub = (0)
*
* The scrubber will print NOTICE messages of what it is doing if
* "memscrub_verbose" is set.
*
* If the scrubber's sleep time calculation drops to zero ticks,
* memscrub_override_ticks will be used as the sleep time instead. The
* sleep time should only drop to zero on a system with over 131.84
* terabytes of memory, or where the default scrubber parameters have
* been adjusted. For example, reducing memscrub_span_pages or
* memscrub_period_sec causes the sleep time to drop to zero with less
* memory. Note that since the sleep time is calculated in clock ticks,
* using hires clock ticks allows for more memory before the sleep time
* becomes zero.
*
* The scrubber will exit (or never be started) if it finds the variable
* "disable_memscrub" set.
*
* The scrubber will pause (not read memory) when "pause_memscrub"
* is set. It will check the state of pause_memscrub at each wakeup
* period. The scrubber will not make up for lost time. If you
* pause the scrubber for a prolonged period of time you can use
* the "read_all_memscrub" switch (see below) to catch up. In addition,
* pause_memscrub is used internally by the post memory DR callbacks.
* It is set for the small period of time during which the callbacks
* are executing. This ensures "memscrub_lock" will be released,
* allowing the callbacks to finish.
*
* The scrubber will read all memory if "read_all_memscrub" is set.
* The normal span read will also occur during the wakeup.
*
* MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system
* must have before we'll start the scrubber.
*
* MEMSCRUB_DFL_SPAN_PAGES (32MB) is based on the guess that 0.025 sec
* is a "good" amount of minimum time for the thread to run at a time.
*
* MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
* twice the frequency the hardware folk estimated would be necessary.
*
* MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption
* that the scurbber should get its fair share of time (since it
* is short). At a priority of 0 the scrubber will be starved.
*/
#include <vm/seg_kmem.h>
/*
* Should really have paddr_t defined, but it is broken. Use
* ms_paddr_t in the meantime to make the code cleaner
*/
/*
* Global Routines:
*/
int memscrub_init(void);
void memscrub_induced_error(void);
/*
* Global Data:
*/
/*
* scrub if we have at least this many pages
*/
/*
* scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
*/
/*
* scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
*/
/*
* almost anything is higher priority than scrubbing
*/
/*
* size used when scanning memory
*/
/*
* This message indicates that we have exceeded the limitations of
* the memscrubber. See the comments above regarding what would
* cause the sleep time to become zero. In DEBUG mode, this message
* is logged on the console and in the messages file. In non-DEBUG
* mode, it is only logged in the messages file.
*/
#ifdef DEBUG
"seconds, consuming entire CPU."
#else
"seconds, consuming entire CPU."
#endif /* DEBUG */
/*
*/
/*
* Static Routines
*/
static void memscrubber(void);
static void memscrub_cleanup(void);
/*
* Static Data
*/
/*
* memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ...
*/
static void memscrub_init_mem_config(void);
static void memscrub_uninit_mem_config(void);
/*
* Linked list of memscrub aware spans having retired pages.
* Currently enabled only on sun4u USIII-based platforms.
*/
typedef struct memscrub_page_retire_span {
static void memscrub_page_retire_span_add(ms_paddr_t);
static void memscrub_page_retire_span_delete(ms_paddr_t);
static int memscrub_page_retire_span_search(ms_paddr_t);
static void memscrub_page_retire_span_list_update(void);
/*
* add_to_page_retire_list: Set by cpu_async_log_err() routine
* page due to memscrub reading. Cleared by memscrub after updating
* global page retire span list. Piggybacking on protection of
* memscrub_lock, which is held during set and clear.
* Note: When cpu_async_log_err() calls memscrub_induced_error(), it is running
* on softint context, which gets fired on a cpu memscrub thread currently
* running. Memscrub thread has affinity set during memscrub_read(), hence
* migration to new cpu not expected.
*/
static int add_to_page_retire_list = 0;
/*
* Keep track of some interesting statistics
*/
static struct memscrub_kstats {
} memscrub_counts = {
{ "done_early", KSTAT_DATA_UINT32 },
{ "early_sec", KSTAT_DATA_UINT32 },
{ "done_late", KSTAT_DATA_UINT32 },
{ "late_sec", KSTAT_DATA_UINT32 },
{ "interval_ticks", KSTAT_DATA_UINT32 },
{ "force_run", KSTAT_DATA_UINT32 },
{ "errors_found", KSTAT_DATA_UINT32 },
};
/*
* create memscrub_memlist from phys_install list
* initialize locks, set memscrub_phys_pages.
*/
int
memscrub_init(void)
{
/*
* only startup the scrubber if we have a minimum
* number of pages
*/
if (physinstalled >= MEMSCRUB_MIN_PAGES) {
/*
* initialize locks
*/
/*
* copy phys_install to memscrub_memlist
*/
if (memscrub_add_span(
return (-1);
}
}
/*
* initialize kstats
*/
"misc", KSTAT_TYPE_NAMED,
sizeof (memscrub_counts) / sizeof (kstat_named_t),
if (memscrub_ksp) {
} else {
}
/*
* create memscrubber thread
*/
/*
* We don't want call backs changing the list
* if there is no thread running. We do not
* on memory size changes.
*/
}
return (0);
}
static void
memscrub_cleanup(void)
{
while (memscrub_memlist) {
(void) memscrub_delete_span(
}
if (memscrub_ksp)
}
#ifdef MEMSCRUB_DEBUG
static void
{
}
}
#endif /* MEMSCRUB_DEBUG */
/* ARGSUSED */
static void
memscrub_wakeup(void *c)
{
/*
* grab mutex to guarantee that our wakeup call
* arrives after we go to sleep -- so we can't sleep forever.
*/
}
/*
* provide an interface external to the memscrubber
* which will force the memscrub thread to run vs.
* waiting for the timeout, if one is set
*/
void
memscrub_run(void)
{
if (memscrub_tid) {
(void) untimeout(memscrub_tid);
memscrub_wakeup((void *)NULL);
}
}
/*
* this calculation doesn't account for the time
* that the actual scan consumes -- so we'd fall
* slightly behind schedule with this interval.
* It's very small.
*/
static uint_t
compute_interval_ticks(void)
{
/*
* We use msp_safe mpp_safe below to insure somebody
* doesn't set memscrub_span_pages or memscrub_phys_pages
* to 0 on us.
*/
if (memscrub_phys_pages <= msp_safe) {
} else {
}
}
return (interval_ticks);
}
void
memscrubber(void)
{
/*
* notify CPR of our existence
*/
if (memscrub_memlist == NULL) {
goto memscrub_exit;
}
for (;;) {
if (disable_memscrub)
break;
/*
* compute interval_ticks
*/
/*
* If the calculated sleep time is zero, and pause_memscrub
* has been set, make sure we sleep so that another thread
* can acquire memscrub_lock.
*/
if (interval_ticks == 0 && pause_memscrub) {
interval_ticks = hz;
}
/*
* And as a fail safe, under normal non-paused operation, do
* not allow the sleep time to be zero.
*/
if (interval_ticks == 0) {
if (!sleep_warn_printed) {
sleep_warn_printed = 1;
}
}
/*
* Did we just reach the end of memory? If we are at the
* end of memory, delay end of memory processing until
* pause_memscrub is not set.
*/
if (reached_end && !pause_memscrub) {
/*
* past deadline, start right away
*/
interval_ticks = 0;
} else {
/*
* we finished ahead of schedule.
* wait till previous deadline before re-start.
*/
}
reached_end = 0;
sleep_warn_printed = 0;
}
if (interval_ticks != 0) {
/*
* it is safe from our standpoint for CPR to
* suspend the system
*/
/*
* hit the snooze bar
*/
/*
* go to sleep
*/
/*
* at this point, no timeout should be set
*/
memscrub_tid = 0;
/*
* we need to goto work and will be modifying
* TTEs
*/
}
if (memscrub_phys_pages == 0) {
goto memscrub_exit;
}
if (!pause_memscrub) {
if (paused_message) {
paused_message = 0;
if (memscrub_verbose)
"resuming");
}
if (read_all_memscrub) {
if (memscrub_verbose)
"reading all memory per request");
reached_end = 0;
while (!reached_end) {
if (disable_memscrub)
break;
}
read_all_memscrub = 0;
}
/*
* read 1 span
*/
if (disable_memscrub)
break;
/*
* determine physical address range
*/
&pages);
address);
}
if (pause_memscrub && !paused_message) {
paused_message = 1;
if (memscrub_verbose)
}
}
thread_exit();
/* NOTREACHED */
}
/*
* condition address and size
* such that they span legal physical addresses.
*
* when appropriate, address will be rounded up to start of next
* struct memlist, and pages will be rounded down to the end of the
* memlist size.
*
* returns 1 if reached end of list, else returns 0.
*/
static int
{
int reached_end = 0;
/*
* find memlist struct that contains addrp
* assumes memlist is sorted by ascending address.
*/
/*
* if before this chunk, round up to beginning
*/
break;
}
/*
* if before end of chunk, then we found it
*/
break;
/* else go to next struct memlist */
}
/*
* if we hit end of list, start at beginning
*/
}
/*
* now we have legal address, and its mlp, condition bytes
*/
if (bytes > bytes_remaining)
/*
* will this span take us to end of list?
*/
reached_end = 1;
/* return values */
return (reached_end);
}
/*
* add a span to the memscrub list
* add to memscrub_phys_pages
*/
int
{
#ifdef MEMSCRUB_DEBUG
#endif /* MEMSCRUB_DEBUG */
int retval;
#ifdef MEMSCRUB_DEBUG
#endif /* MEMSCRUB_DEBUG */
#ifdef MEMSCRUB_DEBUG
#endif /* MEMSCRUB_DEBUG */
return (retval);
}
static int
{
int retval = 0;
/*
* allocate a new struct memlist
*/
retval = -1;
goto add_done;
}
/*
* first insert
*/
goto add_done;
}
/*
* insert into sorted list
*/
continue;
/*
* else insert here
*/
/*
* prepend to next
*/
goto add_done;
}
/*
* append to next
*/
/*
* don't overlap with next->ml_next
*/
retval = -1;
goto add_done;
}
/*
* concatenate next and next->ml_next
*/
sizeof (struct memlist));
goto add_done;
}
}
goto add_done;
}
/* don't overlap with next */
retval = -1;
goto add_done;
}
/*
* insert before next
*/
} else {
}
goto add_done;
} /* end for */
/*
* end of list, prev is valid and next is NULL
*/
if (retval != -1)
return (retval);
}
/*
* delete a span from the memscrub list
* subtract from memscrub_phys_pages
*/
int
{
int retval = 0;
#ifdef MEMSCRUB_DEBUG
#endif /* MEMSCRUB_DEBUG */
/*
* find struct memlist containing page
*/
break;
}
/*
* if start address not in list
*/
retval = -1;
goto delete_done;
}
/*
* error if size goes off end of this struct memlist
*/
retval = -1;
goto delete_done;
}
/*
* pages at beginning of struct memlist
*/
/*
* if start & size match, delete from list
*/
if (next == memscrub_memlist)
} else {
/*
* increment start address by bytes
*/
}
goto delete_done;
}
/*
* pages at end of struct memlist
*/
/*
* decrement size by bytes
*/
goto delete_done;
}
/*
* delete a span in the middle of the struct memlist
*/
{
/*
* create a new struct memlist
*/
retval = -1;
goto delete_done;
}
/*
* existing struct memlist gets address
* and size up to pfn
*/
/*
* new struct memlist gets address starting
* after pfn, until end
*/
/*
* link in new memlist after old
*/
}
if (retval != -1) {
if (memscrub_phys_pages == 0)
disable_memscrub = 1;
}
#ifdef MEMSCRUB_DEBUG
#endif /* MEMSCRUB_DEBUG */
return (retval);
}
static void
{
int scan_mmu_pagesize = 0;
int retired_pages = 0;
pgsread = 0;
if (memscrub_page_retire_span_list != NULL) {
if (memscrub_page_retire_span_search(src)) {
/* retired pages in current span */
scan_mmu_pagesize = 1;
}
}
#ifdef MEMSCRUB_DEBUG
#endif /* MEMSCRUB_DEBUG */
while (blks != 0) {
/* Ensure the PA is properly aligned */
(blks >= MEMSCRUB_BPP4M)) {
(blks >= MEMSCRUB_BPP512K)) {
(blks >= MEMSCRUB_BPP64K)) {
psz = MMU_PAGESIZE;
bpp = MEMSCRUB_BPP;
} else {
if (memscrub_verbose) {
"non-page aligned block starting at 0x%"
}
return;
}
#ifdef MEMSCRUB_DEBUG
#endif /* MEMSCRUB_DEBUG */
/*
* MEMSCRUBBASE is a 4MB aligned page in the
* kernel so that we can quickly map the PA
* to a VA for the block loads performed in
* memscrub_read.
*/
/*
* Can't allow the memscrubber to migrate across CPUs as
* we need to know whether CEEN is enabled for the current
* CPU to enable us to scrub the memory. Don't use
* kpreempt_disable as the time we take to scan a span (even
* without cpu_check_ce having to manually cpu_check_block)
* is too long to hold a higher priority thread (eg, RT)
* off cpu.
*/
/*
* Protect read scrub from async faults. For now, we simply
* maintain a count of such faults caught.
*/
/*
* Check if CEs require logging
*/
no_trap();
} else {
no_trap();
/*
* Got an async error..
* Try rescanning it at MMU_PAGESIZE
* granularity if we were trying to
* read at a larger page size.
* This is to ensure we continue to
* scan the rest of the span.
* OR scanning MMU_PAGESIZE granularity to avoid
* reading retired pages memory when scan_mmu_pagesize
* is set.
*/
int tmp = 0;
/* Don't scrub retired pages */
== 0) {
vaddr += MMU_PAGESIZE;
paddr += MMU_PAGESIZE;
continue;
}
no_trap();
} else {
no_trap();
}
vaddr += MMU_PAGESIZE;
paddr += MMU_PAGESIZE;
}
}
}
pgsread++;
}
/*
* If just finished scrubbing MMU_PAGESIZE at a time, but no retired
* pages found so delete span from global list.
*/
if (scan_mmu_pagesize && retired_pages == 0)
/*
* span. Adding span to global list to enable avoid reading further.
*/
if (add_to_page_retire_list) {
}
if (memscrub_verbose) {
}
}
/*
* Called by cpu_async_log_err() when memscrub read causes
*/
void
memscrub_induced_error(void)
{
}
/*
* Called by page_retire() when toxic pages cannot be retired
* immediately and are scheduled for retire. Memscrubber stops
*/
void
{
}
/*
* Called by memscrub_scan() and memscrub_notify().
*/
static void
{
#ifdef MEMSCRUB_DEBUG
" retired page/s not tracked.\n");
#endif /* MEMSCRUB_DEBUG */
return;
}
}
/*
* Called by memscrub_scan().
* pa: physical address of span to be removed from global list.
*/
static void
{
return;
}
while (next_span) {
sizeof (memscrub_page_retire_span_t));
return;
}
}
}
/*
* Called by memscrub_scan() and memscrub_notify().
* pa: physical address of span to be searched in global list.
*/
static int
{
while (next_span) {
return (1);
}
return (0);
}
/*
* Called from new_memscrub() as a result of memory delete.
* Using page_numtopp_nolock() to determine if we have valid PA.
*/
static void
{
if (memscrub_page_retire_span_list == NULL)
return;
while (cur) {
if (cur == memscrub_page_retire_span_list) {
sizeof (memscrub_page_retire_span_t));
} else {
sizeof (memscrub_page_retire_span_t));
}
} else {
}
}
}
/*
* page ranges. The phys_install list has been updated though, so
* create a new scrub list from it.
*/
static int
{
/*
* copy phys_install to memscrub_memlist
*/
npgs = 0;
while (list) {
}
return (-1);
}
}
while (old_list) {
}
return (0);
}
/*ARGSUSED*/
static void
void *arg,
{
/*
* We increment pause_memscrub before entering new_memscrub(). This
* will force the memscrubber to sleep, allowing the DR callback
* thread to acquire memscrub_lock in new_memscrub(). The use of
* atomic_add_32() allows concurrent memory DR operations to use the
* callbacks safely.
*/
ASSERT(pause_memscrub != 0);
/*
* "Don't care" if we are not scrubbing new memory.
*/
(void) new_memscrub(0); /* retain page retire list */
/* Restore the pause setting. */
}
/*ARGSUSED*/
static int
void *arg,
{
/* Nothing to do. */
return (0);
}
/*ARGSUSED*/
static void
void *arg,
int cancelled)
{
/*
* We increment pause_memscrub before entering new_memscrub(). This
* will force the memscrubber to sleep, allowing the DR callback
* thread to acquire memscrub_lock in new_memscrub(). The use of
* atomic_add_32() allows concurrent memory DR operations to use the
* callbacks safely.
*/
ASSERT(pause_memscrub != 0);
/*
* Must stop scrubbing deleted memory as it may be disconnected.
*/
disable_memscrub = 1;
}
/* Restore the pause setting. */
}
};
static void
{
int ret;
(void *)NULL);
}
static void
{
/* This call is OK if the register call was not done. */
}