/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2015 Joyent, Inc.
*/
#include <mdb/mdb_param.h>
#include <mdb/mdb_modapi.h>
#include <sys/balloon_impl.h>
#endif
#include "avl.h"
#include "memory.h"
/*
* Page walker.
* By default, this will walk all pages in the system. If given an
* address, it will walk all pages belonging to the vnode at that
* address.
*/
/*
* page_walk_data
*
* pw_hashleft is set to -1 when walking a vnode's pages, and holds the
* number of hash locations remaining in the page hash table when
* walking all pages.
*
* The astute reader will notice that pw_hashloc is only used when
* reading all pages (to hold a pointer to our location in the page
* hash table), and that pw_first is only used when reading the pages
* belonging to a particular vnode (to hold a pointer to the first
* page). While these could be combined to be a single pointer, they
* are left separate for clarity.
*/
typedef struct page_walk_data {
long pw_hashleft;
void **pw_hashloc;
int
{
void **ptr;
/*
* Walk all pages
*/
mdb_warn("page_hash, page_hashsz not found or invalid");
return (WALK_ERR);
}
/*
* Since we are walking all pages, initialize hashleft
* to be the remaining number of entries in the page
* hash. hashloc is set the start of the page hash
* table. Setting the walk address to 0 indicates that
* we aren't currently following a hash chain, and that
* we need to scan the page hash table for a page.
*/
} else {
/*
* Walk just this vnode
*/
mdb_warn("unable to read vnode_t at %#lx",
return (WALK_ERR);
}
/*
* We set hashleft to -1 to indicate that we are
* walking a vnode, and initialize first to 0 (it is
* used to terminate the walk, so it must not be set
* until after we have walked the first page). The
* walk address is set to the first page.
*/
}
return (WALK_NEXT);
}
int
{
if (pwd->pw_hashleft < 0) {
/* We're walking a vnode's pages */
/*
* If we don't have any pages to walk, we have come
* back around to the first one (we finished), or we
* can't read the page we're looking at, we are done.
*/
return (WALK_DONE);
return (WALK_ERR);
}
/*
* Set the walk address to the next page, and if the
* first page hasn't been set yet (i.e. we are on the
* first page), set it.
*/
} else if (pwd->pw_hashleft > 0) {
/* We're walking all pages */
/*
* If pp (the walk address) is NULL, we scan through
* the page hash table until we find a page.
*/
/*
* Iterate through the page hash table until we
* find a page or reach the end.
*/
do {
mdb_warn("unable to read from %#p",
pwd->pw_hashloc);
return (WALK_ERR);
}
pwd->pw_hashleft--;
pwd->pw_hashloc++;
/*
* We've reached the end; exit.
*/
return (WALK_DONE);
}
return (WALK_ERR);
}
/*
* Set the walk address to the next page.
*/
} else {
/* We've finished walking all pages. */
return (WALK_DONE);
}
}
void
{
}
/*
* allpages walks all pages in the system in order they appear in
* the memseg structure
*/
int
{
mdb_warn("allpages only supports global walks.\n");
return (WALK_ERR);
}
mdb_warn("couldn't walk 'memseg'");
return (WALK_ERR);
}
return (WALK_NEXT);
}
int
{
while (pg_num > 0) {
return (WALK_ERR);
}
for (i = 0; i < pg_read; i++) {
return (ret);
}
}
return (WALK_NEXT);
}
void
{
}
/*
* Hash table + LRU queue.
* This table is used to cache recently read vnodes for the memstat
* command, to reduce the number of mdb_vread calls. This greatly
* speeds the memstat command on on live, large CPU count systems.
*/
struct vn_htable_list {
};
/*
* vn_q_first -> points to to head of queue: the vnode that was most
* recently used
* vn_q_last -> points to the oldest used vnode, and is freed once a new
* vnode is read.
* vn_htable -> hash table
* vn_htable_buf -> contains htable objects
* vn_htable_size -> total number of items in the hash table
* vn_htable_buckets -> number of buckets in the hash table
*/
typedef struct vn_htable {
int vn_htable_size;
int vn_htable_buckets;
} vn_htable_t;
/* allocate memory, initilize hash table and LRU queue */
static void
{
int i;
}
}
}
/*
* Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
* The function tries to find needed information in the following order:
*
* 1. check if ptr is the first in queue
* 2. check if ptr is in hash table (if so move it to the top of queue)
* 3. do mdb_vread, remove last queue item from queue and hash table.
* Insert new information to freed object, and put this object in to the
* top of the queue.
*/
static int
{
int hkey;
/* 1. vnode ptr is the first in queue, just get v_flag and return */
return (0);
}
/* 2. search the hash table for this ptr */
/* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
return (-1);
}
/* we will insert read data into the last element in queue */
/* remove last hp->vn_q_last object from hash table */
}
/* insert data into new free object */
/* insert new object into hash table */
}
/* Remove from queue. hent is not first, vn_q_prev is not NULL */
else
/* Add to the front of queue */
/* Set v_flag in vnode pointer from hent */
return (0);
}
/* Summary statistics of pages */
typedef struct memstat {
} memstat_t;
/*
* Summarize pages by type and update stat information
*/
/* ARGSUSED */
static int
{
if (PP_ISBOOTPAGES(pp))
stats->ms_bootpages++;
return (WALK_NEXT);
stats->ms_zfs_data++;
stats->ms_cachelist++;
return (WALK_ERR);
else if (IS_SWAPFSVP(vp))
else
return (WALK_NEXT);
}
/* ARGSUSED */
int
{
#endif
/*
* -s size, is an internal option. It specifies the size of vn_htable.
* Hash table size is set in the following order:
* If user has specified the size that is larger than VN_LARGE: try it,
* but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
* failed to allocate default to VN_SMALL.
* For a better efficiency of hash table it is highly recommended to
* set size to a prime number.
*/
return (DCMD_USAGE);
/* Initialize vnode hash list and queue */
/* Total physical memory */
mdb_warn("unable to read total_pages");
return (DCMD_ERR);
}
/* Artificially limited memory */
mdb_warn("unable to read physmem");
return (DCMD_ERR);
}
/* read kernel vnode array pointer */
mdb_warn("unable to read kvps");
return (DCMD_ERR);
}
/*
* Read the zio vnode pointer.
*/
/*
* If physmem != total_pages, then the administrator has limited the
* number of pages available in the system. Excluded pages are
* associated with the unused pages vnode. Read this vnode so the
* pages can be excluded in the page accounting.
*/
mdb_warn("unable to read unused_pages_vp");
return (DCMD_ERR);
}
/* walk all pages, collect statistics */
&stats) == -1) {
mdb_warn("can't walk memseg");
return (DCMD_ERR);
}
((physmem) * 10)))
mdb_printf("Page Summary Pages MB"
" %%Tot\n");
mdb_printf("------------ ---------------- ----------------"
" ----\n");
mdb_printf("Kernel %16llu %16llu %3lu%%\n",
if (stats.ms_bootpages != 0) {
mdb_printf("Boot pages %16llu %16llu %3lu%%\n",
}
if (stats.ms_zfs_data != 0) {
mdb_printf("ZFS File Data %16llu %16llu %3lu%%\n",
}
mdb_printf("Anon %16llu %16llu %3lu%%\n",
mdb_printf("Exec and libs %16llu %16llu %3lu%%\n",
mdb_printf("Page cache %16llu %16llu %3lu%%\n",
mdb_printf("Free (cachelist) %16llu %16llu %3lu%%\n",
/*
* occasionally, we double count pages above. To avoid printing
* absurdly large values for freemem, we clamp it at zero.
*/
else
freemem = 0;
/* Are we running under Xen? If so, get balloon memory usage. */
else
freemem = 0;
}
#endif
if (bln_size != -1) {
mdb_printf("Balloon %16lu %16llu %3lu%%\n",
}
#endif
mdb_printf("\nTotal %16lu %16lu\n",
if (physmem != total_pages) {
mdb_printf("Physical %16lu %16lu\n",
}
return (DCMD_OK);
}
void
pagelookup_help(void)
{
"Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n"
"\n"
"Can be invoked three different ways:\n\n"
" ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n"
" %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n"
" %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n"
"\n"
"The latter two forms are useful in pipelines.\n");
}
int
{
int usedaddr = 0;
0) != argc) {
return (DCMD_USAGE);
}
"pagelookup: at least one of -v vp or -o offset "
"required.\n");
return (DCMD_USAGE);
}
usedaddr = 1;
offset = mdb_get_dot();
usedaddr = 1;
}
mdb_warn("pagelookup: address required\n");
return (DCMD_USAGE);
}
"pagelookup: address specified when both -v and -o were "
"passed");
return (DCMD_USAGE);
}
if (pageaddr == 0) {
mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n",
return (DCMD_OK);
}
return (DCMD_OK);
}
/*ARGSUSED*/
int
{
return (DCMD_USAGE);
}
if (pp == 0) {
return (DCMD_ERR);
}
if (flags & DCMD_PIPE_OUT) {
} else {
}
return (DCMD_OK);
}
int
{
page_t p;
if (!(flags & DCMD_ADDRSPEC)) {
mdb_warn("can't walk pages");
return (DCMD_ERR);
}
return (DCMD_OK);
}
if (DCMD_HDRSPEC(flags)) {
mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
"PAGE", "VNODE", "OFFSET", "SELOCK",
"LCT", "COW", "IO", "FS", "ST");
}
return (DCMD_ERR);
}
mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
return (DCMD_OK);
}
int
{
void *ptr;
mdb_warn("swapinfo not found or invalid");
return (WALK_ERR);
}
return (WALK_NEXT);
}
int
{
return (WALK_DONE);
return (WALK_ERR);
}
}
int
{
char *name;
if (!(flags & DCMD_ADDRSPEC)) {
mdb_warn("can't walk swapinfo");
return (DCMD_ERR);
}
return (DCMD_OK);
}
if (DCMD_HDRSPEC(flags)) {
mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
"ADDR", "VNODE", "PAGES", "FREE", "NAME");
}
return (DCMD_ERR);
}
name = "*error*";
mdb_printf("%0?lx %?p %9d %9d %s\n",
return (DCMD_OK);
}
int
{
return (WALK_DONE);
return (WALK_ERR);
}
}
int
{
if (!(flags & DCMD_ADDRSPEC)) {
int i;
static const char *lists[] = {
"phys_install",
"phys_avail",
"virt_avail"
};
return (DCMD_USAGE);
if (!list)
list = 1;
if (!(list & 1))
continue;
return (DCMD_ERR);
}
ptr) == -1) {
mdb_warn("can't walk memlist");
return (DCMD_ERR);
}
}
return (DCMD_OK);
}
if (DCMD_HDRSPEC(flags))
return (DCMD_ERR);
}
return (DCMD_OK);
}
int
{
mdb_warn("seg walk must begin at struct as *\n");
return (WALK_ERR);
}
/*
* this is really just a wrapper to AVL tree walk
*/
return (avl_walk_init(wsp));
}
/*ARGSUSED*/
int
{
struct seg s;
if (argc != 0)
return (DCMD_USAGE);
mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n",
"SEG", "BASE", "SIZE", "DATA", "OPS");
}
return (DCMD_ERR);
}
mdb_printf("%?p %?p %?lx %?p %a\n",
return (DCMD_OK);
}
/*ARGSUSED*/
static int
{
(*nres)++;
return (WALK_NEXT);
}
static int
{
/*
* Use the segvn_pages walker to find all of the in-core pages
* for this mapping.
*/
mdb_warn("failed to walk segvn_pages (s_data=%p)",
}
} else {
mdb_printf(" [ anon ]");
}
} else {
}
mdb_printf("\n");
return (WALK_NEXT);
}
static int
{
} else {
mdb_printf(" [ anon ]");
}
} else {
}
mdb_printf("\n");
return (WALK_NEXT);
}
/*ARGSUSED*/
int
{
if (!(flags & DCMD_ADDRSPEC))
return (DCMD_USAGE);
return (DCMD_USAGE);
return (DCMD_ERR);
}
else
if (quick) {
mdb_printf("VNODE\n");
} else {
}
return (DCMD_ERR);
}
return (DCMD_OK);
}
typedef struct anon_walk_data {
int
{
mdb_warn("anon walk doesn't support global walks\n");
return (WALK_ERR);
}
return (WALK_ERR);
}
return (WALK_ERR);
}
/* update min and maxslot with the given constraints */
} else {
aw->aw_nlevone =
aw->aw_levone_ndx = 0;
}
aw->aw_levtwo_ndx = 0;
out:
return (0);
}
int
{
/*
* Once we've walked through level one, we're done.
*/
return (WALK_DONE);
}
aw->aw_levone_ndx++;
} else {
if (aw->aw_levtwo_ndx == 0) {
/* The first time through, skip to our first index. */
if (aw->aw_levone_ndx == 0) {
aw->aw_levone_ndx =
aw->aw_levtwo_ndx =
}
aw->aw_levtwo_ndx = 0;
aw->aw_levone_ndx++;
return (WALK_NEXT);
}
ANON_CHUNK_SIZE * sizeof (uintptr_t));
-1) {
mdb_warn("unable to read anon_map %p's "
"second-level map %d at %p",
return (WALK_ERR);
}
}
/* update the indices for next time */
aw->aw_levtwo_ndx++;
aw->aw_levtwo_ndx = 0;
aw->aw_levone_ndx++;
}
/* make sure the slot # is in the requested range */
return (WALK_DONE);
}
}
}
}
return (WALK_NEXT);
}
void
{
}
int
{
}
int
{
mdb_warn("segvn_anon walk doesn't support global walks\n");
return (WALK_ERR);
}
mdb_warn("segvn_anon walk: unable to read segvn_data at %p",
svd_addr);
return (WALK_ERR);
}
mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n",
svd_addr);
return (WALK_ERR);
}
mdb_warn("segvn_anon walk: unable to read amp %p for "
return (WALK_ERR);
}
mdb_warn("segvn_anon walk: unable to read seg %p for "
return (WALK_ERR);
}
mdb_warn("anon map %p is too small for segment %p\n",
return (WALK_ERR);
}
return (anon_walk_init_common(wsp,
}
typedef struct {
typedef struct {
static int
{
/* See if the page is of interest */
return (WALK_NEXT);
}
/* See if we have space for the new entry, then add it. */
return (WALK_DONE);
}
svw->svw_sparse_count++;
return (WALK_NEXT);
}
static int
{
const segvn_sparse_t *const l = lp;
const segvn_sparse_t *const r = rp;
if (l->svs_offset < r->svs_offset) {
return (-1);
}
if (l->svs_offset > r->svs_offset) {
return (1);
}
return (0);
}
/*
* Builds on the "anon_all" walker to walk all resident pages in a segvn_data
* structure. For segvn_datas without an anon structure, it just looks up
* pages in the vnode. For segvn_datas with an anon structure, NULL slots
* pass through to the vnode, and non-null slots are checked for residency.
*/
int
{
mdb_warn("segvn walk doesn't support global walks\n");
return (WALK_ERR);
}
svw->svw_anonskip = 0;
svw->svw_sparse_idx = 0;
svw->svw_walkoff = 0;
-1) {
return (WALK_ERR);
}
mdb_warn("failed to read seg at %p (from %p)",
return (WALK_ERR);
}
/* make the walk terminate immediately; no pages */
/*
* If we don't have an anon pointer, and the segment is large,
* we try to load the in-memory pages into a fixed-size array,
* which is then sorted and reported directly. This is much
* faster than doing a mdb_page_lookup() for each possible
* offset.
*
* If the allocation fails, or there are too many pages
* in-core, we fall back to looking up the pages individually.
*/
sizeof (*svw->svw_sparse));
} else {
sizeof (*svw->svw_sparse),
}
}
"segvn_anon" : "segvn_anon_all";
/*
* If we're not printing all offsets, and the segvn_data has
* no backing VP, we can use the "segvn_anon" walker, which
* efficiently skips NULL slots.
*
* Otherwise, we layer over the "segvn_anon_all" walker
* (which reports all anon slots, even NULL ones), so that
* segvn_pages_walk_step() knows the precise offset for each
* element. It uses that offset information to look up the
* backing pages for NULL anon slots.
*/
mdb_warn("segvn_pages: failed to layer \"%s\" "
return (WALK_ERR);
}
}
return (WALK_NEXT);
}
int
{
/* If we've walked off the end of the segment, we're done. */
return (WALK_DONE);
}
/*
* If we've got a sparse page array, just send it directly.
*/
return (WALK_DONE);
}
} else {
} else {
svw->svw_sparse_idx++;
}
}
/*
* If there's no anon, or the anon slot is NULL, look up
* <vp, offset>.
*/
} else {
}
} else {
/*
* We have a "struct anon"; if it's not swapped out,
* look up the page.
*/
mdb_warn("walk segvn_pages: segvn_data %p "
"offset %ld, anon page <%p, %llx> not "
}
} else {
mdb_warn("walk segvn_pages: useless struct "
}
}
}
return (WALK_ERR);
}
}
}
return (WALK_NEXT);
}
void
{
sizeof (*svw->svw_sparse));
}
}
/*
* Grumble, grumble.
*/
int
{
long smd_hashmsk;
int hash;
if (!(flags & DCMD_ADDRSPEC))
return (DCMD_USAGE);
mdb_warn("failed to read smd_hashmsk");
return (DCMD_ERR);
}
mdb_warn("failed to read smd_hash");
return (DCMD_ERR);
}
mdb_warn("failed to read smd_hash");
return (DCMD_ERR);
}
mdb_warn("failed to read segkmap");
return (DCMD_ERR);
}
return (DCMD_ERR);
}
if (argc != 0) {
else
}
mdb_warn("couldn't read smap at %p",
return (DCMD_ERR);
}
do {
return (DCMD_ERR);
}
mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n",
return (DCMD_OK);
}
return (DCMD_OK);
}
/*ARGSUSED*/
int
{
if (!(flags & DCMD_ADDRSPEC))
return (DCMD_USAGE);
mdb_warn("failed to read segkmap");
return (DCMD_ERR);
}
return (DCMD_ERR);
}
return (DCMD_ERR);
}
return (DCMD_OK);
}