memory.c revision af4c679f647cf088543c762e33d41a3ac52cfa14
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <mdb/mdb_modapi.h>
#include <sys/balloon_impl.h>
#endif
/*
* Page walker.
* By default, this will walk all pages in the system. If given an
* address, it will walk all pages belonging to the vnode at that
* address.
*/
/*
* page_walk_data
*
* pw_hashleft is set to -1 when walking a vnode's pages, and holds the
* number of hash locations remaining in the page hash table when
* walking all pages.
*
* The astute reader will notice that pw_hashloc is only used when
* reading all pages (to hold a pointer to our location in the page
* hash table), and that pw_first is only used when reading the pages
* belonging to a particular vnode (to hold a pointer to the first
* page). While these could be combined to be a single pointer, they
* are left separate for clarity.
*/
typedef struct page_walk_data {
long pw_hashleft;
void **pw_hashloc;
int
{
void **ptr;
/*
* Walk all pages
*/
mdb_warn("page_hash, page_hashsz not found or invalid");
return (WALK_ERR);
}
/*
* Since we are walking all pages, initialize hashleft
* to be the remaining number of entries in the page
* hash. hashloc is set the start of the page hash
* table. Setting the walk address to 0 indicates that
* we aren't currently following a hash chain, and that
* we need to scan the page hash table for a page.
*/
} else {
/*
* Walk just this vnode
*/
mdb_warn("unable to read vnode_t at %#lx",
return (WALK_ERR);
}
/*
* We set hashleft to -1 to indicate that we are
* walking a vnode, and initialize first to 0 (it is
* used to terminate the walk, so it must not be set
* until after we have walked the first page). The
* walk address is set to the first page.
*/
}
return (WALK_NEXT);
}
int
{
if (pwd->pw_hashleft < 0) {
/* We're walking a vnode's pages */
/*
* If we don't have any pages to walk, we have come
* back around to the first one (we finished), or we
* can't read the page we're looking at, we are done.
*/
return (WALK_DONE);
return (WALK_ERR);
}
/*
* Set the walk address to the next page, and if the
* first page hasn't been set yet (i.e. we are on the
* first page), set it.
*/
} else if (pwd->pw_hashleft > 0) {
/* We're walking all pages */
/*
* If pp (the walk address) is NULL, we scan through
* the page hash table until we find a page.
*/
/*
* Iterate through the page hash table until we
* find a page or reach the end.
*/
do {
mdb_warn("unable to read from %#p",
pwd->pw_hashloc);
return (WALK_ERR);
}
pwd->pw_hashleft--;
pwd->pw_hashloc++;
/*
* We've reached the end; exit.
*/
return (WALK_DONE);
}
return (WALK_ERR);
}
/*
* Set the walk address to the next page.
*/
} else {
/* We've finished walking all pages. */
return (WALK_DONE);
}
}
void
{
}
/*
* allpages walks all pages in the system in order they appear in
* the memseg structure
*/
#define PAGE_BUFFER 128
int
{
mdb_warn("allpages only supports global walks.\n");
return (WALK_ERR);
}
mdb_warn("couldn't walk 'memseg'");
return (WALK_ERR);
}
return (WALK_NEXT);
}
int
{
while (pg_num > 0) {
return (WALK_ERR);
}
for (i = 0; i < pg_read; i++) {
return (ret);
}
}
return (WALK_NEXT);
}
void
{
}
/*
* Hash table + LRU queue.
* This table is used to cache recently read vnodes for the memstat
* command, to reduce the number of mdb_vread calls. This greatly
* speeds the memstat command on on live, large CPU count systems.
*/
#define VN_SMALL 401
#define VN_LARGE 10007
struct vn_htable_list {
};
/*
* vn_q_first -> points to to head of queue: the vnode that was most
* recently used
* vn_q_last -> points to the oldest used vnode, and is freed once a new
* vnode is read.
* vn_htable -> hash table
* vn_htable_buf -> contains htable objects
* vn_htable_size -> total number of items in the hash table
* vn_htable_buckets -> number of buckets in the hash table
*/
typedef struct vn_htable {
struct vn_htable_list *vn_q_first;
struct vn_htable_list *vn_q_last;
struct vn_htable_list **vn_htable;
struct vn_htable_list *vn_htable_buf;
int vn_htable_size;
int vn_htable_buckets;
} vn_htable_t;
/* allocate memory, initilize hash table and LRU queue */
static void
{
int i;
}
}
}
/*
* Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
* The function tries to find needed information in the following order:
*
* 1. check if ptr is the first in queue
* 2. check if ptr is in hash table (if so move it to the top of queue)
* 3. do mdb_vread, remove last queue item from queue and hash table.
* Insert new information to freed object, and put this object in to the
* top of the queue.
*/
static int
{
int hkey;
/* 1. vnode ptr is the first in queue, just get v_flag and return */
return (0);
}
/* 2. search the hash table for this ptr */
/* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
return (-1);
}
/* we will insert read data into the last element in queue */
/* remove last hp->vn_q_last object from hash table */
}
/* insert data into new free object */
/* insert new object into hash table */
}
/* Remove from queue. hent is not first, vn_q_prev is not NULL */
else
/* Add to the front of queue */
/* Set v_flag in vnode pointer from hent */
return (0);
}
/* Summary statistics of pages */
typedef struct memstat {
} memstat_t;
/*
* Summarize pages by type and update stat information
*/
/* ARGSUSED */
static int
{
return (WALK_NEXT);
stats->ms_zfs_data++;
stats->ms_cachelist++;
return (WALK_ERR);
else if (IS_SWAPFSVP(vp))
else
return (WALK_NEXT);
}
/* ARGSUSED */
int
{
#endif
/*
* -s size, is an internal option. It specifies the size of vn_htable.
* Hash table size is set in the following order:
* If user has specified the size that is larger than VN_LARGE: try it,
* but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
* failed to allocate default to VN_SMALL.
* For a better efficiency of hash table it is highly recommended to
* set size to a prime number.
*/
return (DCMD_USAGE);
/* Initialize vnode hash list and queue */
/* Grab base page size */
mdb_warn("unable to read _pagesize");
return (DCMD_ERR);
}
/* Total physical memory */
mdb_warn("unable to read total_pages");
return (DCMD_ERR);
}
/* Artificially limited memory */
mdb_warn("unable to read physmem");
return (DCMD_ERR);
}
/* read kernel vnode array pointer */
mdb_warn("unable to read kvps");
return (DCMD_ERR);
}
/*
* Read the zio vnode pointer.
*/
/*
* If physmem != total_pages, then the administrator has limited the
* number of pages available in the system. Excluded pages are
* associated with the unused pages vnode. Read this vnode so the
* pages can be excluded in the page accounting.
*/
mdb_warn("unable to read unused_pages_vp");
return (DCMD_ERR);
}
/* walk all pages, collect statistics */
&stats) == -1) {
mdb_warn("can't walk memseg");
return (DCMD_ERR);
}
((physmem) * 10)))
mdb_printf("Page Summary Pages MB"
" %%Tot\n");
mdb_printf("------------ ---------------- ----------------"
" ----\n");
mdb_printf("Kernel %16llu %16llu %3lu%%\n",
if (stats.ms_zfs_data != 0)
mdb_printf("ZFS File Data %16llu %16llu %3lu%%\n",
mdb_printf("Anon %16llu %16llu %3lu%%\n",
mdb_printf("Exec and libs %16llu %16llu %3lu%%\n",
mdb_printf("Page cache %16llu %16llu %3lu%%\n",
mdb_printf("Free (cachelist) %16llu %16llu %3lu%%\n",
/*
* occasionally, we double count pages above. To avoid printing
* absurdly large values for freemem, we clamp it at zero.
*/
else
freemem = 0;
/* Are we running under Xen? If so, get balloon memory usage. */
else
freemem = 0;
}
#endif
if (bln_size != -1) {
mdb_printf("Balloon %16lu %16llu %3lu%%\n",
}
#endif
mdb_printf("\nTotal %16lu %16lu\n",
if (physmem != total_pages) {
mdb_printf("Physical %16lu %16lu\n",
}
return (DCMD_OK);
}
int
{
page_t p;
if (!(flags & DCMD_ADDRSPEC)) {
mdb_warn("can't walk pages");
return (DCMD_ERR);
}
return (DCMD_OK);
}
if (DCMD_HDRSPEC(flags)) {
mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
"PAGE", "VNODE", "OFFSET", "SELOCK",
"LCT", "COW", "IO", "FS", "ST");
}
return (DCMD_ERR);
}
mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
return (DCMD_OK);
}
int
{
void *ptr;
mdb_warn("swapinfo not found or invalid");
return (WALK_ERR);
}
return (WALK_NEXT);
}
int
{
return (WALK_DONE);
return (WALK_ERR);
}
}
int
{
char *name;
if (!(flags & DCMD_ADDRSPEC)) {
mdb_warn("can't walk swapinfo");
return (DCMD_ERR);
}
return (DCMD_OK);
}
if (DCMD_HDRSPEC(flags)) {
mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
"ADDR", "VNODE", "PAGES", "FREE", "NAME");
}
return (DCMD_ERR);
}
name = "*error*";
mdb_printf("%0?lx %?p %9d %9d %s\n",
return (DCMD_OK);
}
int
{
return (WALK_DONE);
return (WALK_ERR);
}
}
int
{
if (!(flags & DCMD_ADDRSPEC)) {
int i;
static const char *lists[] = {
"phys_install",
"phys_avail",
"virt_avail"
};
return (DCMD_USAGE);
if (!list)
list = 1;
if (!(list & 1))
continue;
return (DCMD_ERR);
}
ptr) == -1) {
mdb_warn("can't walk memlist");
return (DCMD_ERR);
}
}
return (DCMD_OK);
}
if (DCMD_HDRSPEC(flags))
return (DCMD_ERR);
}
return (DCMD_OK);
}