dwarf.c revision ad0b1ea5d69a45fe23c434277599e315f29a5fca
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright 2012 Jason King. All rights reserved.
* Use is subject to license terms.
*/
/*
* DWARF to tdata conversion
*
* For the most part, conversion is straightforward, proceeding in two passes.
* On the first pass, we iterate through every die, creating new type nodes as
* necessary. Referenced tdesc_t's are created in an uninitialized state, thus
* allowing type reference pointers to be filled in. If the tdesc_t
* corresponding to a given die can be completely filled out (sizes and offsets
* calculated, and so forth) without using any referenced types, the tdesc_t is
* marked as resolved. Consider an array type. If the type corresponding to
* the array contents has not yet been processed, we will create a blank tdesc
* for the contents type (only the type ID will be filled in, relying upon the
* later portion of the first pass to encounter and complete the referenced
* type). We will then attempt to determine the size of the array. If the
* array has a byte size attribute, we will have completely characterized the
* array type, and will be able to mark it as resolved. The lack of a byte
* size attribute, on the other hand, will prevent us from fully resolving the
* type, as the size will only be calculable with reference to the contents
* type, which has not, as yet, been encountered. The array type will thus be
* left without the resolved flag, and the first pass will continue.
*
* When we begin the second pass, we will have created tdesc_t nodes for every
* type in the section. We will traverse the tree, from the iidescs down,
* processing each unresolved node. As the referenced nodes will have been
* populated, the array type used in our example above will be able to use the
* size of the referenced types (if available) to determine its own type. The
* traversal will be repeated until all types have been resolved or we have
* failed to make progress. When all tdescs have been resolved, the conversion
* is complete.
*
* There are, as always, a few special cases that are handled during the first
* and second passes:
*
* 1. Empty enums - GCC will occasionally emit an enum without any members.
* Later on in the file, it will emit the same enum type, though this time
* with the full complement of members. All references to the memberless
* enum need to be redirected to the full definition. During the first
* pass, each enum is entered in dm_enumhash, along with a pointer to its
* corresponding tdesc_t. If, during the second pass, we encounter a
* memberless enum, we use the hash to locate the full definition. All
* tdescs referencing the empty enum are then redirected.
*
* 2. Forward declarations - If the compiler sees a forward declaration for
* a structure, followed by the definition of that structure, it will emit
* DWARF data for both the forward declaration and the definition. We need
* to resolve the forward declarations when possible, by redirecting
* redirection is done completely within the first pass. We begin by
* recording all forward declarations in dw_fwdhash. When we define a
* structure, we check to see if there have been any corresponding forward
* declarations. If so, we redirect the tdescs which referenced the forward
* declarations to the structure or union definition.
*
* XXX see if a post traverser will allow the elimination of repeated pass 2
* traversals.
*/
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <errno.h>
#include <libelf.h>
#include <libdwarf.h>
#include <libgen.h>
#include <dwarf.h>
#include "ctf_headers.h"
#include "ctftools.h"
#include "memory.h"
#include "list.h"
#include "traverse.h"
/* The version of DWARF which we support. */
#define DWARF_VERSION 2
/*
* We need to define a couple of our own intrinsics, to smooth out some of the
* differences between the GCC and DevPro DWARF emitters. See the referenced
* routines and the special cases in the file comment for more details.
*
* Type IDs are 32 bits wide. We're going to use the top of that field to
* indicate types that we've created ourselves.
*/
/*
* To reduce the staggering amount of error-handling code that would otherwise
* be required, the attribute-retrieval routines handle most of their own
* errors. If the following flag is supplied as the value of the `req'
* argument, they will also handle the absence of a requested attribute by
* terminating the program.
*/
#define DW_ATTR_REQ 1
#define TDESC_HASH_BUCKETS 511
typedef struct dwarf {
char *dw_cuname; /* name of compilation unit */
} dwarf_t;
static tid_t
{
return (++dw->dw_mfgtid_last);
}
static void
{
}
static tdesc_t *
{
return (tdp);
else
return (NULL);
}
/*
* Resolve a tdesc down to a node which should have a size. Returns the size,
* zero if the size hasn't yet been determined.
*/
static size_t
{
for (;;) {
case INTRINSIC:
case POINTER:
case ARRAY:
case FUNCTION:
case STRUCT:
case UNION:
case ENUM:
case FORWARD:
return (0);
case TYPEDEF:
case VOLATILE:
case CONST:
case RESTRICT:
continue;
case 0: /* not yet defined */
return (0);
default:
terminate("tdp %u: tdesc_size on unknown type %d\n",
}
}
}
static size_t
{
for (;;) {
case INTRINSIC:
case ARRAY:
case FUNCTION:
case STRUCT:
case UNION:
case ENUM:
case POINTER:
case FORWARD:
return (0);
case TYPEDEF:
case VOLATILE:
case RESTRICT:
case CONST:
continue;
case 0: /* not yet defined */
return (0);
default:
terminate("tdp %u: tdesc_bitsize on unknown type %d\n",
}
}
}
static tdesc_t *
{
for (;;) {
case TYPEDEF:
case VOLATILE:
case RESTRICT:
case CONST:
break;
case 0: /* not yet defined */
return (NULL);
default:
return (tdp);
}
}
}
static Dwarf_Off
{
return (off);
terminate("failed to get offset for die: %s\n",
/*NOTREACHED*/
return (0);
}
static Dwarf_Die
{
int rc;
return (sib);
else if (rc == DW_DLV_NO_ENTRY)
return (NULL);
terminate("die %llu: failed to find type sibling: %s\n",
/*NOTREACHED*/
return (NULL);
}
static Dwarf_Die
{
int rc;
return (child);
else if (rc == DW_DLV_NO_ENTRY)
return (NULL);
terminate("die %llu: failed to find type child: %s\n",
/*NOTREACHED*/
return (NULL);
}
static Dwarf_Half
{
return (tag);
terminate("die %llu: failed to get tag for type: %s\n",
/*NOTREACHED*/
return (0);
}
static Dwarf_Attribute
{
int rc;
return (attr);
} else if (rc == DW_DLV_NO_ENTRY) {
if (req) {
name);
} else {
return (NULL);
}
}
terminate("die %llu: failed to get attribute for type: %s\n",
/*NOTREACHED*/
return (NULL);
}
static Dwarf_Half
{
return (form);
terminate("failed to get attribute form for type: %s\n",
/*NOTREACHED*/
return (0);
}
/*
* the following functions lookup the value of an attribute in a DIE:
*
* die_signed
* die_unsigned
* die_bool
* die_string
*
* They all take the same parameters (with the exception of valp which is
* a pointer to the type of the attribute we are looking up):
*
* dw - the dwarf object to look in
* die - the DIE we're interested in
* name - the name of the attribute to lookup
* valp - pointer to where the value of the attribute is placed
* req - if the value is required (0 / non-zero)
*
* If the attribute is not found, one of the following happens:
* - program terminates (req is non-zero)
* - function returns 0
*
* If the value is found, and in a form (class) we can handle, the function
* returns 1.
*
* Currently, we can only handle attribute values that are stored as
* constants (immediate value). If an attribute has a form we cannot
* handle (for example VLAs may store the dimensions of the array
* as a DWARF expression that can compute it at runtime by reading
* values off the stack or other locations in memory), it is treated
* the same as if the attribute does not exist.
*/
static int
int req)
{
return (0); /* die_attr will terminate for us if necessary */
if (req == 0)
return (0);
terminate("die %llu: failed to get signed (form 0x%x)\n",
}
return (1);
}
static int
int req)
{
return (0); /* die_attr will terminate for us if necessary */
if (req == 0)
return (0);
terminate("die %llu: failed to get unsigned (form 0x%x)\n",
}
return (1);
}
static int
{
return (0); /* die_attr will terminate for us if necessary */
if (req == 0)
return (0);
terminate("die %llu: failed to get bool (form 0x%x)\n",
}
return (1);
}
static int
{
char *str;
return (0); /* die_attr will terminate for us if necessary */
if (req == 0)
return (0);
terminate("die %llu: failed to get string (form 0x%x)\n",
}
return (1);
}
static Dwarf_Off
{
terminate("die %llu: failed to get ref (form 0x%x)\n",
}
return (off);
}
static char *
{
return (str);
}
static int
{
}
static int
{
/*
* Some compilers (gcc) use DW_AT_external to indicate function
* visibility. Others (Sun) use DW_AT_visibility.
*/
return (vis == DW_VIS_exported);
else
}
static tdesc_t *
{
return (tdp);
}
static tdesc_t *
{
return (tdp);
}
static int
{
return (0); /* die_attr will terminate for us if necessary */
terminate("die %llu: failed to get mem offset location list\n",
}
terminate("die %llu: cannot parse member offset\n",
}
return (1);
}
static tdesc_t *
{
return (tdp);
}
/*
* Manufacture a void type. Used for gcc-emitted stabs, where the lack of a
* type reference implies a reference to a void type. A void *, for example
* will be represented by a pointer die without a DW_AT_type. CTF requires
* that pointer nodes point to something, so we'll create a void for use as
* the target. Note that the DWARF data may already create a void type. Ours
* would then be a duplicate, but it'll be removed in the self-uniquification
* merge performed at the completion of DWARF->tdesc conversion.
*/
static tdesc_t *
{
}
static tdesc_t *
{
}
}
/*
* Used for creating bitfield types. We create a copy of an existing intrinsic,
* adjusting the size of the copy to match what the caller requested. The
* caller can then use the copy as the type for a bitfield structure member.
*/
static tdesc_t *
{
terminate("tdp %u: attempt to make a bit field from an "
}
return (new);
}
static void
{
} else {
terminate("die %llu: unexpected non-subrange node in array\n",
}
/*
* Array bounds can be signed or unsigned, but there are several kinds
* of signless forms (data1, data2, etc) that take their sign from the
* routine that is trying to interpret them. That is, data1 can be
* either signed or unsigned, depending on whether you use the signed or
* unsigned accessor function. GCC will use the signless forms to store
* unsigned values which have their high bit set, so we need to try to
* read them first as unsigned to get positive values. We could also
* try signed first, falling back to unsigned if we got a negative
* value.
*/
else
/*
* Different compilers use different index types. Force the type to be
* a common, known value (long).
*/
}
}
/*
* Create a tdesc from an array node. Some arrays will come with byte size
* attributes, and thus can be resolved immediately. Others don't, and will
* need to wait until the second pass for resolution.
*/
static void
{
int flags;
/*
* Ensure that sub-dimensions have sizes too before marking
* as resolved.
*/
flags = 0;
break;
}
}
}
}
/*ARGSUSED1*/
static int
{
return (1);
return (1);
}
return (1);
}
/*ARGSUSED1*/
static int
{
return (1);
return (1);
}
/*
* Most enums (those with members) will be resolved during this first pass.
* Others - those without members (see the file comment) - won't be, and will
* need to wait until the second pass when they can be matched with their full
* definitions.
*/
static void
{
do {
/* Nested type declaration */
continue;
}
&uval, 0)) {
} else {
terminate("die %llu: enum %llu: member without "
}
}
}
}
static int
{
return (-1); /* stop the iteration */
}
return (0);
}
/*ARGSUSED1*/
static int
{
return (1);
/*
* The answer to this one won't change from iteration to iteration,
* so don't even try.
*/
tdesc_name(tdp));
}
return (1);
}
static int
{
return (0);
}
/*
* Structures and unions will never be resolved during the first pass, as we
* won't be able to fully determine the member sizes. The second pass, which
* have access to sizing information, will be able to complete the resolution.
*/
static void
{
tdesc_name(tdp));
return;
}
/*
* GCC allows empty SOUs as an extension.
*/
goto out;
do {
if (tag != DW_TAG_member) {
/* Nested type declaration */
continue;
}
/*
* an empty name, even though nothing can really handle them
* properly. Note that some versions of GCC miss out debug
* info for anon structs, though recent versions are fixed (gcc
* bug 11816).
*/
&mloff, 0)) {
}
else
#ifdef _BIG_ENDIAN
#else
#endif
}
/*
* GCC will attempt to eliminate unused types, thus decreasing the
* size of the emitted dwarf. That is, if you declare a foo_t in your
* header, include said header in your source file, and neglect to
* actually use (directly or indirectly) the foo_t in the source file,
* the foo_t won't make it into the emitted DWARF. So, at least, goes
* the theory.
*
* Occasionally, it'll emit the DW_TAG_structure_type for the foo_t,
* and then neglect to emit the members. Strangely, the loner struct
* tag will always be followed by a proper nested declaration of
* something else. This is clearly a bug, but we're not going to have
* time to get it fixed before this goo goes back, so we'll have to work
* around it. If we see a no-membered struct with a nested declaration
* (i.e. die_child of the struct tag won't be null), we'll ignore it.
* Being paranoid, we won't simply remove it from the hash. Instead,
* we'll decline to create an iidesc for it, thus ensuring that this
* type won't make it into the output file. To be safe, we'll also
* change the name.
*/
return;
}
out:
}
}
static void
{
}
static void
{
}
/*ARGSUSED1*/
static int
{
return (1);
continue;
/*
* members, a size of 0 is correct.
*/
continue;
continue;
return (1);
}
return (1);
}
/*
* This member is a bitfield, and needs to reference
* an intrinsic type with the same width. If the
* currently-referenced type isn't of the same width,
* we'll copy it, adjusting the width of the copy to
* the size we'd like.
*/
}
}
return (1);
}
/*ARGSUSED1*/
static int
{
return (1);
}
}
return (1);
}
static void
{
int i;
/*
* We'll begin by processing any type definition nodes that may be
* lurking underneath this one.
*/
/* Nested type declaration */
}
}
/*
* This is a prototype. We don't add prototypes to the
* tree, so we're going to drop the tdesc. Unfortunately,
* it has already been added to the tree. Nobody will reference
* it, though, and it will be leaked.
*/
return;
}
} else {
}
/*
* Count the arguments to the function, then read them in.
*/
else if (tag == DW_TAG_unspecified_parameters &&
}
continue;
}
}
}
/*
* GCC and DevPro use different names for the base types. While the terms are
* the same, they are arranged in a different order. Some terms, such as int,
* are implied in one, and explicitly named in the other. Given a base type
* as input, this routine will return a common name, along with an intr_t
* that reflects said name.
*/
static intr_t *
{
char buf[100];
char *base, *c;
int sign = 1;
char fmt = '\0';
if (strcmp(c, "signed") == 0)
sign = 1;
else if (strcmp(c, "unsigned") == 0)
sign = 0;
else if (strcmp(c, "long") == 0)
nlong++;
else if (strcmp(c, "char") == 0) {
nchar++;
fmt = 'c';
} else if (strcmp(c, "short") == 0)
nshort++;
else if (strcmp(c, "int") == 0)
nint++;
else {
/*
* If we don't recognize any of the tokens, we'll tell
* the caller to fall back to the dwarf-provided
* encoding information.
*/
return (NULL);
}
}
return (NULL);
if (nchar > 0) {
return (NULL);
base = "char";
} else if (nshort > 0) {
if (nlong > 0)
return (NULL);
base = "short";
} else if (nlong > 0) {
base = "long";
} else {
base = "int";
}
base);
return (intr);
}
typedef struct fp_size_map {
static const fp_size_map_t fp_encodings[] = {
#ifdef __sparc
#else
#endif
{ { 0, 0 } }
};
static uint_t
{
if (enc == DW_ATE_complex_float) {
mult = 2;
col = 1;
} else if (enc == DW_ATE_imaginary_float ||
col = 2;
map++;
}
/*NOTREACHED*/
return (0);
}
static intr_t *
{
switch (enc) {
case DW_ATE_unsigned:
case DW_ATE_address:
break;
case DW_ATE_unsigned_char:
break;
case DW_ATE_signed:
break;
case DW_ATE_signed_char:
break;
case DW_ATE_boolean:
break;
case DW_ATE_float:
case DW_ATE_complex_float:
case DW_ATE_imaginary_float:
break;
default:
terminate("die %llu: unknown base type encoding 0x%llx\n",
}
return (intr);
}
static void
{
char *new;
/*
* The compilers have their own clever (internally inconsistent) ideas
* as to what base types should look like. Some times gcc will, for
* example, use DW_ATE_signed_char for char. Other times, however, it
* will use DW_ATE_signed. Needless to say, this causes some problems
* down the road, particularly with merging. We do, however, use the
* DWARF idea of type sizes, as this allows us to avoid caring about
* the data model.
*/
/* XXX make a name parser for float too */
/* Found it. We'll use the parsed version */
} else {
/*
* We didn't recognize the type, so we'll create an intr_t
* based on the DWARF data.
*/
tdesc_name(tdp));
}
}
static void
{
} else {
}
}
}
static void
{
}
static void
{
}
static void
{
}
static void
{
}
static void
{
}
/*ARGSUSED3*/
static void
{
char *name;
/*
* We'll begin by processing any type definition nodes that may be
* lurking underneath this one.
*/
tag != DW_TAG_variable) {
/* Nested type declaration */
}
}
/*
* We process neither prototypes nor subprograms without
* names.
*/
return;
}
else
char *name;
continue;
terminate("die %llu: func arg %d has no name\n",
}
continue;
}
}
int i;
continue;
}
}
}
/*ARGSUSED3*/
static void
{
char *name;
return; /* skip prototypes and nameless objects */
}
/*ARGSUSED2*/
static int
{
return (1);
tdesc_name(fwd));
}
return (1);
}
/*ARGSUSED*/
static void
{
}
/*
* Used to map the die to a routine which can parse it, using the tag to do the
* mapping. While the processing of most tags entails the creation of a tdesc,
* there are a few which don't - primarily those which result in the creation of
* iidescs which refer to existing tdescs.
*/
typedef struct die_creator {
static const die_creator_t die_creators[] = {
{ DW_TAG_array_type, 0, die_array_create },
{ DW_TAG_enumeration_type, 0, die_enum_create },
{ DW_TAG_pointer_type, 0, die_pointer_create },
{ DW_TAG_structure_type, 0, die_struct_create },
{ DW_TAG_subroutine_type, 0, die_funcptr_create },
{ DW_TAG_typedef, 0, die_typedef_create },
{ DW_TAG_union_type, 0, die_union_create },
{ DW_TAG_base_type, 0, die_base_create },
{ DW_TAG_const_type, 0, die_const_create },
{ DW_TAG_volatile_type, 0, die_volatile_create },
{ DW_TAG_restrict_type, 0, die_restrict_create },
{ 0, 0, NULL }
};
static const die_creator_t *
{
const die_creator_t *dc;
return (dc);
}
return (NULL);
}
static void
{
const die_creator_t *dc;
}
return;
}
}
}
static void
{
do {
}
static tdtrav_cb_f die_resolvers[] = {
NULL,
NULL, /* intrinsic */
NULL, /* pointer */
die_array_resolve, /* array */
NULL, /* function */
die_sou_resolve, /* struct */
die_sou_resolve, /* union */
die_enum_resolve, /* enum */
die_fwd_resolve, /* forward */
NULL, /* typedef */
NULL, /* typedef unres */
NULL, /* volatile */
NULL, /* const */
NULL, /* restrict */
};
static tdtrav_cb_f die_fail_reporters[] = {
NULL,
NULL, /* intrinsic */
NULL, /* pointer */
die_array_failed, /* array */
NULL, /* function */
die_sou_failed, /* struct */
die_sou_failed, /* union */
NULL, /* enum */
NULL, /* forward */
NULL, /* typedef */
NULL, /* typedef unres */
NULL, /* volatile */
NULL, /* const */
NULL, /* restrict */
};
static void
{
int last = -1;
int pass = 0;
do {
pass++;
"types:\n", progname);
terminate("failed to resolve types\n");
}
}
/*
* Any object containing a function or object symbol at any scope should also
* contain DWARF data.
*/
static boolean_t
{
break;
}
}
if (!found)
terminate("cannot convert stripped objects\n");
char *name;
/* Studio emits these local symbols regardless */
return (B_TRUE);
}
}
return (B_FALSE);
}
/*ARGSUSED*/
int
{
int rc;
if (should_have_dwarf(elf)) {
return (-1);
} else {
return (0);
}
/*
* There's no type data in the DWARF section, but
* libdwarf is too clever to handle that properly.
*/
return (0);
}
terminate("failed to initialize DWARF: %s\n",
}
terminate("file does not contain valid DWARF data: %s\n",
/*
* Some compilers emit no DWARF for empty files, others emit an empty
* compilation unit.
*/
should_have_dwarf(elf)) {
terminate("file does not contain dwarf type data "
"(try compiling with -g)\n");
return (0);
}
terminate("file contains too many types\n");
if (vers != DWARF_VERSION) {
terminate("file contains incompatible version %d DWARF code "
"(version 2 required)\n", vers);
}
}
}
terminate("multiple compilation units not supported\n");
die_resolve(&dw);
/* leak the dwarf_t */
return (0);
}