typegraph.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Postmortem type identification
* ------------------------------
*
* When debugging kernel memory corruption problems, one often determines that
* the corrupted buffer has been erroneously written to by a user of an
* adjacent buffer -- determining the specifics of the adjacent buffer can
* therefore offer insight into the cause of the corruption. To determine the
* type of an arbitrary memory buffer, however, one has historically been
* forced to use dcmds ::kgrep and ::whatis in alternating succession; when an
* object of known type is finally reached, types can be back-propagated to
* determine the type of the unknown object.
*
* This process is labor-intensive and error-prone. Using CTF data and a
* collection of heuristics, we would like to both automate this process and
* improve on it.
*
* We start by constructing the pointer graph. Each node in the graph is
* a memory object (either a static object from module data, or a dynamically
* allocated memory object); the node's outgoing edges represent pointers from
* the object to other memory objects in the system.
*
* Once the graph is constructed, we start at nodes of known type, and use the
* type information to determine the type of each pointer represented by an
* outgoing edge. Determining the pointer type allows us to determine the
* type of the edge's destination node, and therefore to iteratively continue
* the process of type identification. This process works as long as all
* pointed-to objects are exactly the size of their inferred types.
*
* Unfortunately, pointed-to objects are often _not_ the size of the pointed-to
* type. This is largely due to three phenomena:
*
* (a) C makes no distinction between a pointer to a single object and a
* pointer to some number of objects of like type.
*
* (b) C performs no bounds checking on array indexing, allowing declarations
* of structures that are implicitly followed by arrays of the type of the
* structure's last member. These declarations most often look like:
*
* typedef struct foo {
* int foo_bar;
* int foo_baz;
* mumble_t foo_mumble[1];
* } foo_t;
*
* When a foo_t is allocated, the size of n - 1 mumble_t's is added to the
* size of a foo_t to derive the size of the allocation; this allows for
* the n trailing mumble_t's to be referenced from the allocated foo_t
* using C's convenient array syntax -- without requiring an additional
* memory dereference. ISO C99 calls the last member in such a structure
* the "flexible array member" (FAM); we adhere to this terminology.
*
* (c) It is not uncommon for structures to embed smaller structures, and
* to pass pointers to these smaller structures to routines that track
* the structures only by the smaller type. This can be thought of as
* a sort of crude-but-efficient polymorphism; see e.g., struct seg and
* its embedded avl_node_t. It is less common (but by no means unheard
* of) for the smaller structures to be used as place holders in data
* structures consisting of the larger structure. That is, instead of an
* instance of the larger structure being pointed to by the smaller
* structure pointer, an instance of the smaller structure is pointed to
* the larger structure pointer; see e.g., struct buf and struct hbuf or
* struct seg_pcache and struct seg_phash. This construct is particularly
* important to identify when the smaller structures are in a contiguous
* array (as they are in each of the two examples provided): by examining
* only the data structure of larger structures, one would erroneously
* assume that the array of the smaller structure is actually an array of
* the larger structure.
*
* Taken together, these three phenomena imply that if we have a pointer to
* an object that is larger than the pointed-to type, we don't know if the
* object is an array of objects of the pointed-to type, the pointed-to type
* followed by an array of that type's last member, or some other larger type
* that we haven't yet discovered.
*
* Differentiating these three situations is the focus of many of the
* type graph heuristics. Type graph processing is performed in an initial
* pass, four type-determining passes, and a final, post-pass:
*
* Initial: Graph construction
*
* The initial pass constructs the nodes from the kmem caches and module data,
* and constructs the edges by propagating out from module data. Nodes that
* are in module data or known kmem caches (see tg_cachetab[], below) are
* marked with their known type. This pass takes the longest amount of
* wall-clock time, for it frequently induces I/O to read the postmortem image
* into memory from permanent storage.
*
* pass1: Conservative propagation
*
* In pass1, we propagate types out from the known nodes, adding types to
* nodes' tgn_typelists as they are inferred. Nodes are marked as they are
* processed to guarantee halting. We proceed as conservatively as possible
* in this pass; if we discover that a node is larger than twice its inferred
* type (that is, we've run into one of the three phenomena described above),
* we add the inferred type to the node's tgn_typelist, but we don't descend.
*
* pass2: Array determination
*
* In pass2, we visit those nodes through which we refused to descend in pass1.
* If we find one (and only one) structural interpretation for the object, we
* have determined that -- to the best of our knowledge -- we are not seeing
* phenomenon (c). To further differentiate (a) from (b), we check if the
* structure ends with an array of size one; if it does, we assume that it has
* a flexible array member. Otherwise, we perform an additional check: we
* calculate the size of the object modulo the size of the inferred type and
* subtract it from the size of the object. If this value is less than or
* equal to the size of the next-smaller kmem cache, we know that it's not an
* array of the inferred type -- if it were an array of the inferred type, it
* would have been instead allocated out of the next-smaller cache.
*
* In either case (FAM or no FAM), we iterate through each element of the
* hypothesised array, checking that each pointer member points to either NULL
* or valid memory. If pointer members do not satisfy these criteria, it is
* assumed that we have not satisfactorily determined that the given object is
* an array of the inferred type, and we abort processing of the node. Note
* that uninitialized pointers can potentially prevent an otherwise valid
* array from being interpreted as such. Because array misinterpretation
* can induce substantial cascading type misinterpretation, it is preferred to
* be conservative and accurate in such cases -- even if it means a lower type
* recognition rate.
*
* pass3: Type coalescence
*
* pass3 coalesces type possibilities by preferring structural possibilities
* over non-structural ones. For example, if an object is either of type
* "char" (pointed to by a caddr_t) or type "struct frotz", the possibilities
* will be coalesced into just "struct frotz."
*
* pass4: Non-array type inference
*
* pass4 is the least conservative: it is assumed that phenomenon (c) has been
* completely ferreted out by prior passes. All unknown types are visited, and
* incoming edges are checked. If there is only one possible structural
* inference for the unknown type, the node is inferred to be of that type, and
* the type is propagated. This pass picks up those nodes that are larger than
* their inferred type, but for which the inferred type is likely accurate.
* (struct dcentry, with its FAM of characters, is an example type that is
* frequently determined by this pass.)
*
* Post-pass: Greatest unknown reach
*
* If recognition rate is low (or, from a more practical perspective, if the
* object of interest is not automatically identified), it can be useful
* to know which node is the greatest impediment to further recognition.
* If the user can -- by hook or by crook -- determine the true type of this
* node (and set it with ::istype), much more type identification should be
* possible. To facilitate this, we therefore define the _reach_ of a node to
* be the number of unknown nodes that could potentially be identified were the
* node's type better known. We determine the reach by performing a
* depth-first pass through the graph. The node of greatest reach (along with
* the reach itself) are reported upon completion of the post-pass.
*/
#include <mdb/mdb_param.h>
#include <mdb/mdb_modapi.h>
#include <sys/sysmacros.h>
#include <sys/kmem_impl.h>
#include <sys/vmem_impl.h>
#include <stdio.h>
#include "kmem.h"
struct tg_node;
typedef struct tg_edge {
int tge_marked; /* mark */
} tg_edge_t;
typedef struct tg_type {
const char *tgt_rmember; /* referring member */
int tgt_flags; /* flags */
} tg_type_t;
#define TG_TYPE_ARRAY 0x0001
#define TG_TYPE_NOTARRAY 0x0002
#define TG_TYPE_HASFAM 0x0004
typedef struct tg_node {
char tgn_marked; /* marked */
char tgn_postmarked; /* marked in postpass */
int tgn_smaller; /* size of next-smaller cache */
int tgn_reach; /* number of reachable unknown nodes */
} tg_node_t;
typedef struct tg_stats {
} tg_stats_t;
typedef struct tg_typeoffs {
const char **tgto_memberp; /* referring member name */
typedef struct tg_buildstate {
typedef struct tg_poststate {
typedef struct tg_todo {
} tg_todo_t;
typedef struct tg_nodedata {
/*
* Some caches can be pretty arduous to identify (or are rife with conflicts).
* To assist type identification, specific caches are identified with the
* types of their contents. Each cache need _not_ be listed here; in general,
* a cache should only be added to the tg_cachetab[] if the identification rate
* for the cache is less than 95%Every . (The identification rate for a
* specific cache can be quickly determined by specifying the cache to
* ::typegraph.)
*/
struct {
char *tgc_name;
char *tgc_type;
} tg_cachetab[] = {
{ "streams_mblk", "mblk_t" },
{ "seg_cache", "struct seg" },
{ "segvn_cache", "struct segvn_data" },
{ "anon_cache", "struct anon" },
{ "ufs_inode_cache", "inode_t" },
{ "hme_cache", "struct hment" },
{ "queue_cache", "queinfo_t" },
{ "sock_cache", "struct sonode" },
{ "ire_cache", "ire_t" },
};
/*
* Some types are only known by their opaque handles. While this is a good way
* to keep interface clients from eating the Forbidden Fruit, it can make type
* identification difficult -- which can be especially important for big
* structures like dev_info_t. To assist type identification, we keep a table
* to translate from opaque handles to their underlying structures. A
* translation should only be added to the tg_typetab[] if the lack of
* translation is preventing substantial type identification. (This can be
* determined by using the "typeunknown" walker on a dump with bufctl auditing
* enabled, and using "::whatis -b" to determine the types of unknown buffers;
* if many of these unknown types are structures behind an opaque handle, a
* new entry in tg_typetab[] is likely warranted.)
*/
struct {
char *tgt_type_name; /* filled in statically */
char *tgt_actual_name; /* filled in statically */
} tg_typetab[] = {
{ "dev_info_t", "struct dev_info" },
{ "ddi_dma_handle_t", "ddi_dma_impl_t *" },
};
static enum {
TG_PASS1 = 1,
} tg_pass;
static int *tg_sizes; /* copy of kmem_alloc_sizes[] array */
static int tg_nsizes; /* number of sizes in tg_sizes */
static int tg_improved; /* flag indicating that we have improved */
static int tg_built; /* flag indicating that type graph is built */
tg_edge_t *, const char **);
static void
typegraph_typetab_init(void)
{
int i;
mdb_warn("can't find type '%s'\n",
tg_typetab[i].tgt_type_name);
continue;
}
mdb_warn("can't find type '%s'\n",
}
}
}
/*
* A wrapper around mdb_ctf_type_resolve() that first checks the type
* translation table.
*/
static mdb_ctf_id_t
{
int i;
/*
* This could be _much_ more efficient...
*/
break;
}
}
return (ret);
}
/*
* A wrapper around mdb_ctf_type_name() that deals with anonymous structures.
* Anonymous structures are those that have no name associated with them.
* Nearly always, these structures are referred to by a typedef (e.g.
* "typedef struct { int bar } foo_t"); we expect the unresolved type to
* be passed as utype.
*/
static char *
{
static char buf[MDB_SYM_NAMLEN];
} else {
/*
* Perhaps a CTF interface would be preferable to this kludgey
* strcmp()? Perhaps.
*/
}
return (buf);
}
/*
* A wrapper around mdb_ctf_type_size() that accurately accounts for arrays.
*/
static ssize_t
{
if (!mdb_ctf_type_valid(type))
return (-1);
return (mdb_ctf_type_size(type));
return (-1);
if (!mdb_ctf_type_valid(type))
return (-1);
return (-1);
}
/*
* The mdb_ctf_member_iter() callback for typegraph_type_offset().
*/
static int
{
int kind;
/*
* We went past it; return failure.
*/
return (1);
}
return (0);
return (0);
/*
* Haven't reached it yet; continue looking.
*/
return (0);
}
/*
* If the base type is not a structure, an array or a union, and
* the offset equals the desired offset, we have our type.
*/
return (1);
}
/*
* If the type is an array, see if we fall within the bounds.
*/
if (kind == CTF_K_ARRAY) {
return (0);
if (!mdb_ctf_type_valid(type))
return (0);
/*
* Nope, haven't found it yet; continue looking.
*/
return (0);
}
}
return (1);
}
/*
* The mdb_ctf_member_iter() callback for typegraph_type_offset() when the type
* is found to be of kind CTF_K_UNION. With unions, we attempt to do better
* than just completely punting: if all but one of the members is impossible
* (due to, say, size constraints on the destination node), we can propagate
* the valid member.
*/
static int
{
int kind;
return (0);
kind != CTF_K_ARRAY) {
}
if (!mdb_ctf_type_valid(type))
return (0);
return (0);
/*
* Now figure out what exactly we're pointing to.
*/
return (0);
return (0);
/*
* Compare this size to the size of the thing we're pointing to --
* if it's larger than the node that we're pointing to, we know that
* the alleged pointer type must be an invalid interpretation of the
* union.
*/
/*
* We're in luck -- it's not possibly this pointer.
*/
return (0);
}
/*
* This looks like it could be legit. If the type hasn't been
* specified, we could be in business.
*/
/*
* There are two potentially valid interpretations for this
* union. Invalidate the type.
*/
return (1);
}
return (0);
}
/*ARGSUSED*/
static int
typegraph_lastmember(const char *name,
{
return (0);
}
/*
* To determine if a structure is has a flexible array member, we iterate over
* the members; if the structure has more than one member, and the last member
* is an array of size 1, we're going to assume that this structure has a
* flexible array member. Yes, this heuristic is a little sloppy -- but cut me
* some slack: why the hell else would you have an array of size 1? (Don't
* answer that.)
*/
static int
{
int kind;
if (!mdb_ctf_type_valid(type))
return (0);
return (0);
if (!mdb_ctf_type_valid(last))
return (0);
if (kind != CTF_K_ARRAY)
return (0);
/*
* This structure has only one member; even if that member is
* an array of size 1, we'll assume that there is something
* stranger going on than a run-of-the-mill FAM (e.g., a
* kmutex_t).
*/
return (0);
}
return (0);
return (0);
return (1);
}
/*
* This routine takes a type and offset, and returns the type at the specified
* offset. It additionally takes an optional edge to help bust unions, and
* an optional address of a character pointer to set to the name of the member
* found at the specified offset.
*/
static mdb_ctf_id_t
const char **member)
{
/*
* Resolve type to its base type.
*/
switch (kind) {
case CTF_K_ARRAY:
/*
* If this is an array, we need to figure out what it's an
* array _of_. We must then figure out the size of the array
* structure, and then determine our offset within that type.
* From there, we can recurse.
*/
return (inval);
if (!mdb_ctf_type_valid(type))
return (inval);
/*
* offset doesn't point to the middle of the base type and
* return it.
*/
/*
* The offset is pointing to the middle of a
* type; return failure.
*/
return (inval);
}
return (type);
}
case CTF_K_STRUCT:
/*
* If the offset is larger than the size, we need to figure
* out what exactly we're looking at. There are several
* possibilities:
*
* (a) A structure that has this type as its first member.
*
* (b) An array of structures of this type.
*
* (c) A structure has a flexible array member.
*
* The differentiation between (a) and (b) has hopefully
* happened before entering this function. To differentiate
* between (b) and (c), we call typegraph_hasfam().
*/
/*
* We have a live one. Take the size, subtract
* the size of the last element, and recurse.
*/
if (!mdb_ctf_type_valid(last))
return (inval);
return (typegraph_type_offset(last,
}
}
case CTF_K_POINTER:
return (inval);
/*
* The offset is pointing to the middle of a type;
* return failure.
*/
return (inval);
}
return (type);
case CTF_K_UNION:
if (e == NULL) {
/*
* We've been given no outbound edge -- we have no way
* of figuring out what the hell this union is.
*/
return (inval);
}
/*
* Try to bust the union...
*/
if (mdb_ctf_member_iter(type,
/*
* There was at least one valid pointer in there.
* Return "void *".
*/
return (type);
}
default:
/*
* If the offset is anything other than zero, no dice.
*/
if (offset != 0)
return (inval);
return (type);
}
}
/*
* This routine takes an address and a type, and determines if the memory
* pointed to by the specified address could be of the specified type.
* This could become significantly more sophisticated, but for now it's pretty
* simple: this is _not_ of the specified type if it's a pointer, and either:
*
* (a) The alignment is not correct given the type that is pointed to.
*
* (b) The memory pointed to is invalid. Note that structures that have
* uninitialized pointers may cause us to erroneously fail -- but these
* structures are a bug anyway (uninitialized pointers can confuse many
* analysis tools, including ::findleaks).
*/
static int
{
int rkind;
char buf[MDB_SYM_NAMLEN];
return (1);
return (1);
return (1);
/*
* This is definitely unexpected. We should not be getting
* back an error on a node that was successfully read in.
* Lacking something better to do, we'll print an error
* and return.
*/
return (1);
}
/*
* If it's a pointer to a structure or union, it must be
* aligned to sizeof (uintptr_t).
*/
if (tg_verbose) {
mdb_printf("typegraph: pass %d: rejecting "
"*%p (%p) as %s: misaligned pointer\n",
}
return (0);
}
}
return (1);
/*
* For our speculative read, we're going to clamp the referenced size
* at the size of a pointer.
*/
if (tg_verbose) {
mdb_printf("typegraph: pass %d: rejecting *%p (%p) as"
}
return (0);
}
return (1);
}
static int
typegraph_nodecmp(const void *l, const void *r)
{
return (-1);
return (1);
return (0);
}
static tg_node_t *
{
continue;
}
continue;
}
}
return (NULL);
}
static int
typegraph_interested(const kmem_cache_t *c)
{
mdb_warn("cannot read arena %p for cache '%s'",
return (0);
}
/*
* If this cache isn't allocating from the kmem_default or the
* kmem_firewall vmem arena, we're not interested.
*/
return (0);
return (1);
}
static int
{
if (!typegraph_interested(c))
return (WALK_NEXT);
return (WALK_NEXT);
}
static int
{
return (WALK_NEXT);
return (WALK_NEXT);
}
return (WALK_NEXT);
}
/*ARGSUSED*/
static int
{
return (WALK_NEXT);
return (WALK_NEXT);
}
/*ARGSUSED*/
static int
{
return (WALK_NEXT);
}
/*ARGSUSED*/
static int
{
int i, smaller;
if (!typegraph_interested(c))
return (WALK_NEXT);
c->cache_name);
return (WALK_DONE);
}
continue;
&type) == -1) {
mdb_warn("could not find type '%s', allegedly type "
c->cache_name);
break;
}
break;
}
/*
* If this is a named cache (i.e., not from a kmem_alloc_[n] cache),
* the nextsize is 0.
*/
if (mdb_lookup_by_name("kmem_alloc_sizes",
&sym) == -1) {
mdb_warn("failed to find 'kmem_alloc_sizes'");
return (WALK_ERR);
}
mdb_warn("failed to read kmem_alloc_sizes");
return (WALK_ERR);
}
}
/*
* Yes, this is a linear search -- but we're talking about
* a pretty small array (38 elements as of this writing), and
* only executed a handful of times (for each sized kmem
* cache).
*/
for (i = 0; i < tg_nsizes; i++) {
if (tg_sizes[i] == c->cache_bufsize)
break;
}
if (i == tg_nsizes) {
/*
* Something is wrong -- this appears to be a sized
* kmem cache, but we can't find its size in the
* kmem_alloc_sizes array. Emit a warning and return
* failure.
*/
mdb_warn("couldn't find buffer size for %s (%d)"
" in kmem_alloc_sizes array\n", c->cache_name,
c->cache_bufsize);
return (WALK_ERR);
}
if (i == 0) {
smaller = 1;
} else {
}
} else {
smaller = 0;
}
}
return (WALK_NEXT);
}
/*ARGSUSED*/
static int
{
return (WALK_NEXT);
}
static int
{
return (WALK_NEXT);
if (mdb_pwalk("vmem_alloc",
return (WALK_NEXT);
}
static void
{
return;
/*
* Add an anchored node.
*/
push:
/*
* If our address isn't pointer-aligned, we need to align it and
* whack the size appropriately.
*/
goto out;
}
ndx = 0;
mdb_warn("couldn't read ptr at %p (size %ld); rval is %d",
goto out;
}
pop:
continue;
continue;
/*
* We need to record an edge to us.
*/
/*
* If this node is marked, we don't need to descend.
*/
if (node->tgn_marked)
continue;
/*
* We need to descend. To minimize the resource consumption
* of type graph construction, we avoid recursing.
*/
} else {
}
goto push;
}
/*
* If we're here, then we have completed this region. We need to
* free our buffer, and update our "resident" counter accordingly.
*/
out:
/*
* If we have pushed state, we need to pop it.
*/
goto pop;
}
}
}
static void
{
char name[MDB_SYM_NAMLEN];
do {
addr++;
continue;
}
addr++;
continue;
}
/*
* Yes, this is a kludge. "kstat_initial" ends up
* backing the kstat vmem arena -- so we don't want
* to include it as an anchor node.
*/
continue;
}
/*
* We have the symbol; now get its type.
*/
continue;
}
if (!mdb_ctf_type_valid(type)) {
continue;
}
continue;
}
}
/*ARGSUSED*/
static int
{
/*
* If this thread isn't already a node, add it as an anchor. And
* regardless, set its type to be the specified type.
*/
} else {
}
return (WALK_NEXT);
}
/*ARGSUSED*/
static int
{
return (WALK_NEXT);
}
static void
{
if (edge->tge_destoffs == 0) {
} else {
}
/*
* First, search for this type in the type list.
*/
return;
}
tg_improved = 1;
}
static void
{
tg_edge_t *e;
if (!node->tgn_marked)
stats->tgs_unmarked++;
return;
}
stats->tgs_conflicts++;
return;
}
return;
}
/*
* This node is not typed -- but check if any of its outgoing edges
* were successfully typed; such nodes represent candidates for
* an exhaustive type search.
*/
if (e->tge_dest->tgn_typelist) {
stats->tgs_candidates++;
break;
}
}
}
/*ARGSUSED*/
static int
{
stats->tgs_buffers++;
return (WALK_NEXT);
}
return (WALK_NEXT);
}
/*ARGSUSED*/
void
{
mdb_printf("typegraph: ");
mdb_printf("\n");
return;
}
}
static void
{
}
static void
{
}
static void
typegraph_stat_time(int last)
{
if (ts == 0) {
} else {
}
mdb_printf("typegraph: %30s => %lld seconds\n",
mdb_printf("typegraph: %30s => %lld seconds\n",
mdb_printf("typegraph:\n");
if (last)
ts = 0;
}
static void
typegraph_stats(void)
{
size_t i, n;
for (i = 0; i < tg_nnodes - tg_nanchored; i++)
typegraph_stat_print("nodes", n);
typegraph_stat_perc("known or conjectured",
}
/*
* This is called both in pass1 and in subsequent passes (to propagate new type
* inferences).
*/
static void
{
tg_edge_t *e;
const char *member;
if (!mdb_ctf_type_valid(type))
return;
/*
* For each of the nodes corresponding to our outgoing edges,
* determine their type.
*/
int kind;
/*
* If we're being called in pass1, we're very conservative:
*
* (a) If the outgoing edge is beyond the size of the type
* (and the current node is not the root), we refuse to
* descend. This situation isn't actually hopeless -- we
* could be looking at array of the projected type -- but
* we'll allow a later phase to pass in that node and its
* conjectured type as the root.
*
* (b) If the outgoing edge has a destination offset of
* something other than 0, we'll descend, but we won't
* add the type to the type list of the destination node.
* This allows us to gather information that can later be
* used to perform a more constrained search.
*/
continue;
continue;
if (e->tge_srcoffs < offs)
continue;
if (e->tge_marked)
continue;
if (!mdb_ctf_type_valid(ntype))
continue;
continue;
continue;
continue;
continue;
if (e->tge_dest->tgn_marked)
continue;
/*
* If our destination offset is 0 and the type that we marked
* it with is useful, mark the node that we're
* going to visit. And regardless, mark the edge.
*/
e->tge_marked = 1;
/*
* If this isn't a structure, it's pointless to descend.
*/
if (kind != CTF_K_STRUCT)
continue;
/*
* If the conjectured type is less than half of the
* size of the object, we might be dealing with a
* polymorphic type. It's dangerous to descend in
* this case -- if our conjectured type is larger than
* the actual type, we will mispropagate. (See the
* description for phenomenon (c) in the block comment
* for how this condition can arise.) We therefore
* only descend if we are in pass4 and there is only
* one inference for this node.
*/
continue;
/*
* There is either no inference for this node,
* or more than one -- in either case, chicken
* out.
*/
continue;
}
}
} else {
}
} else {
}
}
/*
* If this was from a to-do list, it needs to be freed.
*/
}
/*
* Now peel something off of the to-do list.
*/
goto again;
}
/*
* Nothing more to do -- free the to-do list.
*/
}
}
static void
typegraph_pass1(void)
{
int i;
for (i = 0; i < tg_nnodes; i++)
}
static void
{
return;
/*
* Fucking unions...
*/
break;
}
/*
* There's more than one interpretation for
* this structure; for now, we punt.
*/
break;
}
}
}
return;
if (fam) {
/*
* If this structure has a flexible array member, and the
* FAM type isn't a struct or pointer, we're going to treat
* it as if it did not have a FAM.
*/
fam = 0;
}
if (!fam) {
/*
* If this doesn't have a flexible array member, and our
* preferred type is greater than half the size of the node, we
* won't try to treat it as an array.
*/
return;
/*
* If the next-smaller cache size is zero, we were
* expecting the type size to evenly divide the node
* size -- we must not have the right type.
*/
if (node->tgn_smaller == 0)
return;
/*
* If this were really an array of this type,
* we would have allocated it out of a smaller
* cache -- it's either not an array (e.g.,
* it's a bigger, unknown structure) or it's an
* array of some other type. In either case,
* we punt.
*/
return;
}
}
}
/*
* So far, this looks like it might be an array.
*/
if (node->tgn_smaller != 0) {
} else {
}
if (fam) {
if (!mdb_ctf_type_valid(ntype)) {
offs++;
continue;
}
return;
}
}
} else {
if (!mdb_ctf_type_valid(ntype)) {
offs++;
continue;
}
continue;
return;
}
continue;
}
}
/*
* Now mark this type as an array, and reattempt pass1 from this node.
*/
}
static void
typegraph_pass2(void)
{
int i;
do {
tg_improved = 0;
for (i = 0; i < tg_nnodes; i++)
typegraph_pass2_node(&tg_node[i]);
} while (tg_improved);
}
static void
typegraph_pass3(void)
{
size_t i;
/*
* In this pass, we're going to coalesce types. We're looking for
* nodes where one possible type is a structure, and another is
* either a CTF_K_INTEGER variant (e.g. "char", "void") or a
* CTF_K_FORWARD (an opaque forward definition).
*
* N.B. It might appear to be beneficial to coalesce types when
* the possibilities include two structures, and one is contained
* within the other (e.g., "door_node" contains a "vnode" as its
* first member; "vnode" could be smooshed, leaving just "door_node").
* This optimization is overly aggressive, however: there are too
* many places where we pull stunts with structures such that they're
* actually polymorphic (e.g., "nc_cache" and "ncache"). Performing
* this optimization would run the risk of false propagation --
* which we want to avoid if at all possible.
*/
for (i = 0; i < tg_nnodes; i++) {
continue;
continue;
/*
* First, scan for type CTF_K_STRUCT. If we find it, eliminate
* everything that's a CTF_K_INTEGER or CTF_K_FORWARD.
*/
break;
}
if (kind == CTF_K_INTEGER ||
kind == CTF_K_FORWARD) {
} else {
}
} else {
}
}
}
}
goto again;
}
}
static void
{
tg_edge_t *e;
int kind;
return;
return;
/*
* Now we want to iterate over all incoming edges. If we can find an
* incoming edge pointing to offset 0 from a node of known or
* conjectured type, check the types of the referring node.
*/
if (e->tge_destoffs != 0)
continue;
if (n->tgn_typelist != NULL &&
}
if (!mdb_ctf_type_valid(ntype))
continue;
}
continue;
/*
* We have two valid, potentially conflicting
* interpretations for this node -- chicken out.
*/
break;
}
src = n;
}
}
static void
typegraph_pass4(void)
{
do {
conjectured[gen] = 0;
for (i = 0; i < tg_nnodes; i++) {
conjectured[gen]++;
typegraph_pass4_node(&tg_node[i]);
}
/*
* Perform another pass3 to coalesce any new conflicts.
*/
gen ^= 1;
}
static void
{
if (node->tgn_postmarked)
return;
push:
pop:
if (dest->tgn_postmarked)
continue;
/*
* Add a new element and descend.
*/
} else {
}
goto push;
}
/*
* We are an unknown node; our count must reflect this.
*/
}
/*
* Now we need to check for state to pop.
*/
/*
* We only sum our child's reach into our own if
* that child is of unknown type. This prevents long
* chains of non-increasing reach.
*/
}
goto pop;
}
/*
* We need to free our freelist.
*/
}
}
static void
typegraph_postpass(void)
{
int i, max = 0;
char c[256];
for (i = 0; i < tg_nnodes; i++)
tg_node[i].tgn_postmarked = 0;
/*
* From those nodes with unknown type and no outgoing edges, we want
* to eminate towards the root.
*/
}
for (i = 0; i < tg_nnodes - tg_nanchored; i++) {
continue;
continue;
}
mdb_snprintf(c, sizeof (c), "%p",
} else {
strcpy(c, "-");
}
typegraph_stat_str("greatest unknown node reach", c);
typegraph_stat_perc("reachable unknown nodes",
}
static void
typegraph_allpass(int first)
{
size_t i;
tg_edge_t *e;
if (!first)
for (i = 0; i < tg_nnodes; i++) {
tg_node[i].tgn_marked = 0;
tg_node[i].tgn_postmarked = 0;
e->tge_marked = 0;
}
}
/*ARGSUSED*/
static int
{
return (WALK_NEXT);
return (WALK_NEXT);
}
/*
* As long as we're here, we're going to mark the address pointed
* to by mod_mp as a "struct module" (mod_mp is defined to be a
* void *). Yes, this is a horrible kludge -- but it's not like
* this code isn't already depending on the fact that mod_mp is
* actually a pointer to "struct module" (see the mdb_vread(), above).
* Without this, we can't identify any of the objects allocated by
* krtld.
*/
}
return (WALK_NEXT);
}
static void
typegraph_sort(void)
{
size_t i;
if (tg_sorted)
for (i = 0; i < tg_nsorted; i++)
}
static void
{
mdb_warn("couldn't find node corresponding to "
return;
}
return;
}
}
/*
* There are a few important nodes that are impossible to figure out without
* some carnal knowledge.
*/
static void
typegraph_known_nodes(void)
{
mdb_warn("couldn't read 'segkp'");
} else {
} else {
"struct segkp_segdata");
}
}
}
/*ARGSUSED*/
int
{
kmem_cache_t c;
int i;
if (!mdb_prop_postmortem) {
mdb_warn("typegraph: can only be run on a system "
"dump; see dumpadm(1M)\n");
return (DCMD_ERR);
}
tg_verbose = 0;
return (DCMD_USAGE);
if (tg_built)
goto trace;
/*
* First, we need an estimate on the number of buffers.
*/
if (mdb_walk("kmem_cache",
mdb_warn("couldn't walk 'kmem_cache'");
return (DCMD_ERR);
}
if (mdb_walk("modctl",
mdb_warn("couldn't walk 'modctl'");
return (DCMD_ERR);
}
if (mdb_walk("vmem",
mdb_warn("couldn't walk 'vmem'");
return (DCMD_ERR);
}
for (i = 0; i < est; i++)
mdb_warn("couldn't walk 'vmem'");
return (DCMD_ERR);
}
mdb_warn("couldn't walk 'kmem_cache'");
return (DCMD_ERR);
}
mdb_warn("couldn't find 'kthread_t'");
return (DCMD_ERR);
}
mdb_warn("couldn't walk 'thread'");
return (DCMD_ERR);
}
mdb_warn("couldn't find 'ekstat_t'");
return (DCMD_ERR);
}
mdb_warn("couldn't read 'kstat_arena'");
return (DCMD_ERR);
}
mdb_warn("couldn't walk kstat vmem arena");
return (DCMD_ERR);
}
mdb_warn("couldn't walk 'modctl'");
return (DCMD_ERR);
}
tg_built = 1;
return (DCMD_OK);
}
/*
* If we've been given an address, it's a kmem cache.
*/
return (DCMD_ERR);
}
if (mdb_pwalk("kmem",
return (DCMD_ERR);
}
if (DCMD_HDRSPEC(flags))
"BUFS", "NODES", "UNMRK", "KNOWN",
"INFER", "FRAG", "HIT%");
} else {
perc = 0;
}
mdb_printf("%-25s %7ld %7ld %7ld %7ld %7ld %7ld %3d.%1d\n",
return (DCMD_OK);
}
int
typegraph_built(void)
{
if (!tg_built) {
mdb_warn("type graph not yet built; run ::typegraph.\n");
return (0);
}
return (1);
}
int
{
tg_edge_t *e;
char buf[MDB_SYM_NAMLEN];
int verbose = 0;
return (DCMD_USAGE);
if (!(flags & DCMD_ADDRSPEC))
return (DCMD_USAGE);
if (!typegraph_built())
return (DCMD_ABORT);
return (DCMD_OK);
}
if (!verbose) {
return (DCMD_OK);
}
mdb_printf("unknown type\n");
return (DCMD_OK);
}
}
mdb_printf("possibly %s%s ",
mdb_printf("(%s.%s) ",
}
if (offs != 0)
mdb_printf("\n");
return (DCMD_OK);
}
mdb_printf("possibly one of the following:\n");
mdb_printf(" %s%s ",
if (offs != 0)
mdb_printf("(from %p+%p, type %s)\n",
}
mdb_printf("\n");
return (DCMD_OK);
}
"SIZE", "REACH", "MRK");
mdb_printf("%-?p %-?p %-29s %5d %5d %s\n",
mdb_printf("\n");
mdb_printf(" %-20s %?s %8s %-20s %s\n",
"INFERENCE", "FROM", "SRCOFFS", "REFTYPE", "REFMEMBER");
mdb_printf(" %-20s %?p %8p %-20s %s\n",
}
mdb_printf("\n");
mdb_printf(" %-20s %?s %8s %-20s %s\n",
"FRAGMENT", "FROM", "SRCOFFS", "REFTYPE", "REFMEMBER");
mdb_printf(" %-20s %?p %8p %-20s %s\n",
}
mdb_printf("\n");
"MARKED", "STATUS", "REACH");
mdb_printf(" %?p %8p %8p %6s %6s %ld\n",
n->tgn_reach);
}
"MARKED", "STATUS", "REACH");
mdb_printf(" %?p %8p %8p %6s %6s %ld\n",
n->tgn_reach);
}
mdb_printf("\n");
return (DCMD_OK);
}
int
{
return (DCMD_USAGE);
if (!typegraph_built())
return (DCMD_ABORT);
/*
* Determine the node corresponding to the passed address.
*/
return (DCMD_ERR);
}
/*
* Now look up the specified type.
*/
return (DCMD_ERR);
}
return (DCMD_OK);
}
/*ARGSUSED*/
int
{
return (DCMD_USAGE);
if (!typegraph_built())
return (DCMD_ABORT);
return (DCMD_ERR);
}
return (DCMD_OK);
}
int
{
return (WALK_NEXT);
}
int
{
continue;
continue;
continue;
break;
}
return (WALK_DONE);
}
int
{
continue;
continue;
continue;
break;
}
return (WALK_DONE);
}
#define FINDLOCKS_DEPTH 32
typedef struct foundlock {
const char *fnd_member[FINDLOCKS_DEPTH];
} foundlock_t;
typedef struct findlocks {
int fl_depth;
} findlocks_t;
/*ARGSUSED*/
static int
{
return (WALK_NEXT);
}
static int
{
static int called = 0;
static mdb_ctf_id_t mutex;
static mdb_ctf_id_t thread;
if (!called) {
mdb_warn("can't find 'kmutex_t' type");
return (1);
}
mdb_warn("can't resolve 'kmutex_t' type");
return (1);
}
mdb_warn("can't find 'kthread_t' type");
return (1);
}
mdb_warn("can't resolve 'kthread_t' type");
return (1);
}
called = 1;
}
if (!mdb_ctf_type_valid(type))
return (0);
if (!mdb_ctf_type_valid(type))
return (0);
if (kind == CTF_K_ARRAY) {
return (0);
if (!mdb_ctf_type_valid(type))
return (0);
/*
* Small optimization: don't bother running through the array
* if we know that we can't process the type.
*/
return (0);
for (i = 0; i < arr.mta_nelems; i++) {
}
return (0);
}
if (kind != CTF_K_STRUCT)
return (0);
if (mdb_pwalk("mutex_owner",
return (0);
}
/*
* Check to see if the owner is a thread.
*/
return (0);
return (0);
if (!mdb_ctf_type_valid(ttype))
return (0);
return (0);
return (0);
nlocks = 1;
}
}
return (0);
}
}
return (0);
}
static void
{
int kind;
if (!mdb_ctf_type_valid(type)) {
if (kind == CTF_K_UNION) {
/*
* Insert disparaging comment about unions here.
*/
return;
}
continue;
/*
* There are multiple interpretations for this
* node; we have to punt.
*/
return;
}
}
}
/*
* We have our type. Now iterate for locks. Note that we don't yet
* deal with locks in flexible array members.
*/
}
} else {
}
if (mdb_ctf_type_valid(type))
return;
continue;
}
}
/*ARGSUSED*/
int
{
size_t i, j;
if (argc != 0)
return (DCMD_USAGE);
if (!typegraph_built())
return (DCMD_ABORT);
if (!(flags & DCMD_ADDRSPEC))
addr = 0;
for (i = 0; i < tg_nnodes; i++) {
}
char buf[MDB_SYM_NAMLEN];
mdb_printf("%p (%s",
sizeof (buf)));
} else {
mdb_printf("%p (%a) is owned by %p\n",
} else {
mdb_printf("%p is owned by %p\n",
}
}
}
mdb_printf("findlocks: nota bene: %slocks may be held",
mdb_printf("\n");
} else {
}
return (DCMD_OK);
}
/*
* ::findfalse: Using type knowledge to detect potential false sharing
*
* In caching SMP systems, memory is kept coherent through bus-based snooping
* protocols. Under these protocols, only a single cache may have a given line
* of memory in a dirty state. If a different cache wishes to write to the
* dirty line, the new cache must first read-to-own the dirty line from the
* owning cache. The size of the line used for coherence (the coherence
* granularity) has an immediate ramification for parallel software: because
* only one cache may own a line at a given time, one wishes to avoid a
* situation where two or more small, disjoint data structures are both
* (a) contained within a single line and (b) accessed in parallel on disjoint
* CPUs. This situation -- so-called "false sharing" -- can induce suboptimal
* scalability in otherwise scalable software.
*
* Historically, one has been able to find false sharing only with some
* combination of keen intuition and good luck. And where false sharing has
* been discovered, it has almost always been after having induced suboptimal
* scaling; one has historically not been able to detect false sharing before
* the fact.
*
* Building on the mechanism for postmortem type information, however, we
* can -- from a system crash dump -- detect the the potentially most egregious
* cases of false sharing. Specifically, after having run through the type
* identification passes described above, we can iterate over all nodes,
* looking for nodes that satisfy the following criteria:
*
* (a) The node is an array. That is, the node was either determined to
* be of type CTF_K_ARRAY, or the node was inferred to be an array in
* pass2 of type identification (described above).
*
* (b) Each element of the array is a structure that is smaller than the
* coherence granularity.
*
* (c) The total size of the array is greater than the coherence granularity.
*
* (d) Each element of the array is a structure that contains within it a
* variable or semaphore). We use the presence of a synchronization
* primitive as a crude indicator that the disjoint elements of the
* array are accessed in parallel.
*
* Any node satisfying these criteria is identified as an object that could
* potentially suffer from false sharing, and the node's address, symbolic
* name (if any), type, type size and total size are provided as output.
*
* While there are some instances of false sharing that do not meet the
* above criteria (e.g., if the synchronization for each element is handled
* in a separate structure, or if the elements are only manipulated with
* atomic memory operations), these criteria yield many examples of false
* sharing without swamping the user with false positives.
*/
#define FINDFALSE_COHERENCE_SIZE 64
/*ARGSUSED*/
static int
void *ignored)
{
int i, kind;
static int called = 0;
static struct {
char *name;
} sync[] = {
{ "kmutex_t" },
{ "krwlock_t" },
{ "kcondvar_t" },
{ "ksema_t" },
{ NULL }
};
if (!called) {
char *name;
called = 1;
return (0);
}
return (0);
}
}
}
/*
* See if this type is any of the synchronization primitives.
*/
if (!mdb_ctf_type_valid(type))
return (0);
/*
* We have a winner!
*/
return (1);
}
}
return (0);
}
if (kind != CTF_K_STRUCT)
return (0);
if (mdb_ctf_member_iter(type,
return (1);
return (0);
}
static void
{
int kind;
if (!mdb_ctf_type_valid(type)) {
if (kind == CTF_K_UNION) {
/*
* Once again, the unions impede progress...
*/
return;
}
continue;
/*
* There are multiple interpretations for this
* node; we have to punt.
*/
return;
}
}
}
if (!mdb_ctf_type_valid(type))
return;
/*
* If this isn't an array (or treated as one), it can't induce false
* sharing. (Or at least, we can't detect it.)
*/
return;
return;
} else {
if (kind != CTF_K_ARRAY)
return;
}
if (kind == CTF_K_ARRAY) {
return;
if (!mdb_ctf_type_valid(type))
return;
}
/*
* If the size is greater than or equal to the cache line size, it's
* not false sharing. (Or at least, the false sharing is benign.)
*/
if (size >= FINDFALSE_COHERENCE_SIZE)
return;
return;
/*
* This looks like it could be a falsely shared structure. If this
* type contains a mutex, rwlock, semaphore or condition variable,
* we're going to report it.
*/
return;
} else {
}
mdb_printf("%-22s %2d %7ld\n",
TG_NODE_SIZE(node));
}
/*ARGSUSED*/
int
{
ssize_t i;
return (DCMD_USAGE);
if (!typegraph_built())
return (DCMD_ABORT);
"SZ", "TOTSIZE");
/*
* We go from the back of the bus and move forward to report false
* sharing in named symbols before reporting false sharing in dynamic
* structures.
*/
for (i = tg_nnodes - 1; i >= 0; i--)
findfalse_node(&tg_node[i]);
return (DCMD_OK);
}