memory.c revision 56f33205c9ed776c3c909e07d52e94610a675740
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <mdb/mdb_param.h>
#include <mdb/mdb_modapi.h>
#include <sys/balloon_impl.h>
#endif
#include "avl.h"
/*
* Page walker.
* By default, this will walk all pages in the system. If given an
* address, it will walk all pages belonging to the vnode at that
* address.
*/
/*
* page_walk_data
*
* pw_hashleft is set to -1 when walking a vnode's pages, and holds the
* number of hash locations remaining in the page hash table when
* walking all pages.
*
* The astute reader will notice that pw_hashloc is only used when
* reading all pages (to hold a pointer to our location in the page
* hash table), and that pw_first is only used when reading the pages
* belonging to a particular vnode (to hold a pointer to the first
* page). While these could be combined to be a single pointer, they
* are left separate for clarity.
*/
typedef struct page_walk_data {
long pw_hashleft;
void **pw_hashloc;
int
{
void **ptr;
/*
* Walk all pages
*/
mdb_warn("page_hash, page_hashsz not found or invalid");
return (WALK_ERR);
}
/*
* Since we are walking all pages, initialize hashleft
* to be the remaining number of entries in the page
* hash. hashloc is set the start of the page hash
* table. Setting the walk address to 0 indicates that
* we aren't currently following a hash chain, and that
* we need to scan the page hash table for a page.
*/
} else {
/*
* Walk just this vnode
*/
mdb_warn("unable to read vnode_t at %#lx",
return (WALK_ERR);
}
/*
* We set hashleft to -1 to indicate that we are
* walking a vnode, and initialize first to 0 (it is
* used to terminate the walk, so it must not be set
* until after we have walked the first page). The
* walk address is set to the first page.
*/
}
return (WALK_NEXT);
}
int
{
if (pwd->pw_hashleft < 0) {
/* We're walking a vnode's pages */
/*
* If we don't have any pages to walk, we have come
* back around to the first one (we finished), or we
* can't read the page we're looking at, we are done.
*/
return (WALK_DONE);
return (WALK_ERR);
}
/*
* Set the walk address to the next page, and if the
* first page hasn't been set yet (i.e. we are on the
* first page), set it.
*/
} else if (pwd->pw_hashleft > 0) {
/* We're walking all pages */
/*
* If pp (the walk address) is NULL, we scan through
* the page hash table until we find a page.
*/
/*
* Iterate through the page hash table until we
* find a page or reach the end.
*/
do {
mdb_warn("unable to read from %#p",
pwd->pw_hashloc);
return (WALK_ERR);
}
pwd->pw_hashleft--;
pwd->pw_hashloc++;
/*
* We've reached the end; exit.
*/
return (WALK_DONE);
}
return (WALK_ERR);
}
/*
* Set the walk address to the next page.
*/
} else {
/* We've finished walking all pages. */
return (WALK_DONE);
}
}
void
{
}
/*
* allpages walks all pages in the system in order they appear in
* the memseg structure
*/
#define PAGE_BUFFER 128
int
{
mdb_warn("allpages only supports global walks.\n");
return (WALK_ERR);
}
mdb_warn("couldn't walk 'memseg'");
return (WALK_ERR);
}
return (WALK_NEXT);
}
int
{
while (pg_num > 0) {
return (WALK_ERR);
}
for (i = 0; i < pg_read; i++) {
return (ret);
}
}
return (WALK_NEXT);
}
void
{
}
/*
* Hash table + LRU queue.
* This table is used to cache recently read vnodes for the memstat
* command, to reduce the number of mdb_vread calls. This greatly
* speeds the memstat command on on live, large CPU count systems.
*/
#define VN_SMALL 401
#define VN_LARGE 10007
struct vn_htable_list {
};
/*
* vn_q_first -> points to to head of queue: the vnode that was most
* recently used
* vn_q_last -> points to the oldest used vnode, and is freed once a new
* vnode is read.
* vn_htable -> hash table
* vn_htable_buf -> contains htable objects
* vn_htable_size -> total number of items in the hash table
* vn_htable_buckets -> number of buckets in the hash table
*/
typedef struct vn_htable {
struct vn_htable_list *vn_q_first;
struct vn_htable_list *vn_q_last;
struct vn_htable_list **vn_htable;
struct vn_htable_list *vn_htable_buf;
int vn_htable_size;
int vn_htable_buckets;
} vn_htable_t;
/* allocate memory, initilize hash table and LRU queue */
static void
{
int i;
}
}
}
/*
* Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
* The function tries to find needed information in the following order:
*
* 1. check if ptr is the first in queue
* 2. check if ptr is in hash table (if so move it to the top of queue)
* 3. do mdb_vread, remove last queue item from queue and hash table.
* Insert new information to freed object, and put this object in to the
* top of the queue.
*/
static int
{
int hkey;
/* 1. vnode ptr is the first in queue, just get v_flag and return */
return (0);
}
/* 2. search the hash table for this ptr */
/* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
return (-1);
}
/* we will insert read data into the last element in queue */
/* remove last hp->vn_q_last object from hash table */
}
/* insert data into new free object */
/* insert new object into hash table */
}
/* Remove from queue. hent is not first, vn_q_prev is not NULL */
else
/* Add to the front of queue */
/* Set v_flag in vnode pointer from hent */
return (0);
}
/* Summary statistics of pages */
typedef struct memstat {
} memstat_t;
/*
* Summarize pages by type and update stat information
*/
/* ARGSUSED */
static int
{
return (WALK_NEXT);
stats->ms_zfs_data++;
stats->ms_cachelist++;
return (WALK_ERR);
else if (IS_SWAPFSVP(vp))
else
return (WALK_NEXT);
}
/* ARGSUSED */
int
{
#endif
/*
* -s size, is an internal option. It specifies the size of vn_htable.
* Hash table size is set in the following order:
* If user has specified the size that is larger than VN_LARGE: try it,
* but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
* failed to allocate default to VN_SMALL.
* For a better efficiency of hash table it is highly recommended to
* set size to a prime number.
*/
return (DCMD_USAGE);
/* Initialize vnode hash list and queue */
/* Total physical memory */
mdb_warn("unable to read total_pages");
return (DCMD_ERR);
}
/* Artificially limited memory */
mdb_warn("unable to read physmem");
return (DCMD_ERR);
}
/* read kernel vnode array pointer */
mdb_warn("unable to read kvps");
return (DCMD_ERR);
}
/*
* Read the zio vnode pointer.
*/
/*
* If physmem != total_pages, then the administrator has limited the
* number of pages available in the system. Excluded pages are
* associated with the unused pages vnode. Read this vnode so the
* pages can be excluded in the page accounting.
*/
mdb_warn("unable to read unused_pages_vp");
return (DCMD_ERR);
}
/* walk all pages, collect statistics */
&stats) == -1) {
mdb_warn("can't walk memseg");
return (DCMD_ERR);
}
((physmem) * 10)))
mdb_printf("Page Summary Pages MB"
" %%Tot\n");
mdb_printf("------------ ---------------- ----------------"
" ----\n");
mdb_printf("Kernel %16llu %16llu %3lu%%\n",
if (stats.ms_zfs_data != 0)
mdb_printf("ZFS File Data %16llu %16llu %3lu%%\n",
mdb_printf("Anon %16llu %16llu %3lu%%\n",
mdb_printf("Exec and libs %16llu %16llu %3lu%%\n",
mdb_printf("Page cache %16llu %16llu %3lu%%\n",
mdb_printf("Free (cachelist) %16llu %16llu %3lu%%\n",
/*
* occasionally, we double count pages above. To avoid printing
* absurdly large values for freemem, we clamp it at zero.
*/
else
freemem = 0;
/* Are we running under Xen? If so, get balloon memory usage. */
else
freemem = 0;
}
#endif
if (bln_size != -1) {
mdb_printf("Balloon %16lu %16llu %3lu%%\n",
}
#endif
mdb_printf("\nTotal %16lu %16lu\n",
if (physmem != total_pages) {
mdb_printf("Physical %16lu %16lu\n",
}
return (DCMD_OK);
}
void
pagelookup_help(void)
{
"Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n"
"\n"
"Can be invoked three different ways:\n\n"
" ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n"
" %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n"
" %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n"
"\n"
"The latter two forms are useful in pipelines.\n");
}
int
{
int usedaddr = 0;
0) != argc) {
return (DCMD_USAGE);
}
"pagelookup: at least one of -v vp or -o offset "
"required.\n");
return (DCMD_USAGE);
}
usedaddr = 1;
offset = mdb_get_dot();
usedaddr = 1;
}
mdb_warn("pagelookup: address required\n");
return (DCMD_USAGE);
}
"pagelookup: address specified when both -v and -o were "
"passed");
return (DCMD_USAGE);
}
if (pageaddr == 0) {
mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n",
return (DCMD_OK);
}
return (DCMD_OK);
}
/*ARGSUSED*/
int
{
return (DCMD_USAGE);
}
if (pp == 0) {
return (DCMD_ERR);
}
if (flags & DCMD_PIPE_OUT) {
} else {
}
return (DCMD_OK);
}
int
{
page_t p;
if (!(flags & DCMD_ADDRSPEC)) {
mdb_warn("can't walk pages");
return (DCMD_ERR);
}
return (DCMD_OK);
}
if (DCMD_HDRSPEC(flags)) {
mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
"PAGE", "VNODE", "OFFSET", "SELOCK",
"LCT", "COW", "IO", "FS", "ST");
}
return (DCMD_ERR);
}
mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
return (DCMD_OK);
}
int
{
void *ptr;
mdb_warn("swapinfo not found or invalid");
return (WALK_ERR);
}
return (WALK_NEXT);
}
int
{
return (WALK_DONE);
return (WALK_ERR);
}
}
int
{
char *name;
if (!(flags & DCMD_ADDRSPEC)) {
mdb_warn("can't walk swapinfo");
return (DCMD_ERR);
}
return (DCMD_OK);
}
if (DCMD_HDRSPEC(flags)) {
mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
"ADDR", "VNODE", "PAGES", "FREE", "NAME");
}
return (DCMD_ERR);
}
name = "*error*";
mdb_printf("%0?lx %?p %9d %9d %s\n",
return (DCMD_OK);
}
int
{
return (WALK_DONE);
return (WALK_ERR);
}
}
int
{
if (!(flags & DCMD_ADDRSPEC)) {
int i;
static const char *lists[] = {
"phys_install",
"phys_avail",
"virt_avail"
};
return (DCMD_USAGE);
if (!list)
list = 1;
if (!(list & 1))
continue;
return (DCMD_ERR);
}
ptr) == -1) {
mdb_warn("can't walk memlist");
return (DCMD_ERR);
}
}
return (DCMD_OK);
}
if (DCMD_HDRSPEC(flags))
return (DCMD_ERR);
}
return (DCMD_OK);
}
int
{
mdb_warn("seg walk must begin at struct as *\n");
return (WALK_ERR);
}
/*
* this is really just a wrapper to AVL tree walk
*/
return (avl_walk_init(wsp));
}
/*ARGSUSED*/
int
{
struct seg s;
if (argc != 0)
return (DCMD_USAGE);
mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n",
"SEG", "BASE", "SIZE", "DATA", "OPS");
}
return (DCMD_ERR);
}
mdb_printf("%?p %?p %?lx %?p %a\n",
return (DCMD_OK);
}
/*ARGSUSED*/
static int
{
(*nres)++;
return (WALK_NEXT);
}
static int
{
struct segvn_data svn;
int nres = 0;
goto drive_on;
}
/*
* We've got an amp for this segment; walk through
* the amp, and determine mappings.
*/
char buf[29];
} else
mdb_printf(" [ anon ]");
}
mdb_printf("\n");
return (WALK_NEXT);
}
static int
{
struct segvn_data svn;
} else {
mdb_printf(" [ anon ]");
}
}
mdb_printf("\n");
return (WALK_NEXT);
}
/*ARGSUSED*/
int
{
if (!(flags & DCMD_ADDRSPEC))
return (DCMD_USAGE);
return (DCMD_USAGE);
return (DCMD_ERR);
}
else
if (quick) {
mdb_printf("VNODE\n");
} else {
}
return (DCMD_ERR);
}
return (DCMD_OK);
}
typedef struct anon_walk_data {
int aw_nlevone;
int aw_levone_ndx;
int aw_levtwo_ndx;
int
{
mdb_warn("anon walk doesn't support global walks\n");
return (WALK_ERR);
}
return (WALK_ERR);
}
return (WALK_ERR);
}
} else {
aw->aw_nlevone =
}
aw->aw_levone_ndx = 0;
aw->aw_levtwo_ndx = 0;
aw->aw_levone_ndx++;
mdb_warn("corrupt anon; couldn't"
"find ptr to lev two map");
goto out;
}
}
}
out:
return (0);
}
int
{
int status;
/*
* Once we've walked through level one, we're done.
*/
return (WALK_DONE);
aw->aw_levone_ndx++;
} else {
aw->aw_levtwo_ndx++;
aw->aw_levtwo_ndx = 0;
do {
aw->aw_levone_ndx++;
return (WALK_DONE);
sizeof (uintptr_t),
}
}
} else
goto again;
return (status);
}
void
{
}
/*
* Grumble, grumble.
*/
int
{
long smd_hashmsk;
int hash;
if (!(flags & DCMD_ADDRSPEC))
return (DCMD_USAGE);
mdb_warn("failed to read smd_hashmsk");
return (DCMD_ERR);
}
mdb_warn("failed to read smd_hash");
return (DCMD_ERR);
}
mdb_warn("failed to read smd_hash");
return (DCMD_ERR);
}
mdb_warn("failed to read segkmap");
return (DCMD_ERR);
}
return (DCMD_ERR);
}
if (argc != 0) {
else
}
mdb_warn("couldn't read smap at %p",
return (DCMD_ERR);
}
do {
return (DCMD_ERR);
}
mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n",
return (DCMD_OK);
}
return (DCMD_OK);
}
/*ARGSUSED*/
int
{
struct segmap_data sd;
if (!(flags & DCMD_ADDRSPEC))
return (DCMD_USAGE);
mdb_warn("failed to read segkmap");
return (DCMD_ERR);
}
return (DCMD_ERR);
}
return (DCMD_ERR);
}
return (DCMD_OK);
}