dis_target.c revision 23a1ccea6aac035f084a7a4cdc968687d1b02daf
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * CDDL HEADER START
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * The contents of this file are subject to the terms of the
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Common Development and Distribution License (the "License").
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * You may not use this file except in compliance with the License.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * See the License for the specific language governing permissions
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * and limitations under the License.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * When distributing Covered Code, include this CDDL HEADER in each
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * If applicable, add the following below this CDDL HEADER, with the
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * fields enclosed by brackets "[]" replaced with your own identifying
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * information: Portions Copyright [yyyy] [name of copyright owner]
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * CDDL HEADER END
23a1ccea6aac035f084a7a4cdc968687d1b02dafRoger A. Faulkner * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Standard ELF disassembler target.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * We only support disassembly of ELF files, though this target interface could
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * be extended in the future. Each basic type (target, func, section) contains
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * enough information to uniquely identify the location within the file. The
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * interfaces use libelf(3LIB) to do the actual processing of the file.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Symbol table entry type. We maintain our own symbol table sorted by address,
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * with the symbol name already resolved against the ELF symbol table.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrocktypedef struct sym_entry {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock int se_shndx; /* section where symbol is located */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Target data structure. This structure keeps track of the ELF file
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * information, a few bits of pre-processed section index information, and
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * sorted versions of the symbol table. We also keep track of the last symbol
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * looked up, as the majority of lookups remain within the same symbol.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock Elf *dt_elf_root; /* main libelf handle (for archives) */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock size_t dt_shstrndx; /* section index of .shstrtab */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock size_t dt_symidx; /* section index of symbol table */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock sym_entry_t *dt_symcache; /* last symbol looked up */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock struct dis_tgt *dt_next; /* next target (for archives) */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock Elf_Arhdr *dt_arhdr; /* archive header (for archives) */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Function data structure. We resolve the symbol and lookup the associated ELF
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * data when building this structure. The offset is calculated based on the
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * section's starting address.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Section data structure. We store the entire section header so that we can
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * determine some properties (such as whether or not it contains text) after
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * building the structure.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock const char *ds_name;
23a1ccea6aac035f084a7a4cdc968687d1b02dafRoger A. Faulkner/* Lifted from Psymtab.c, omitting STT_TLS */
23a1ccea6aac035f084a7a4cdc968687d1b02dafRoger A. Faulkner ((1 << STT_OBJECT) | (1 << STT_FUNC) | (1 << STT_COMMON))
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock#define IS_DATA_TYPE(tp) (((1 << (tp)) & DATA_TYPES) != 0)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Pick out the best symbol to used based on the sections available in the
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * target. We prefer SHT_SYMTAB over SHT_DYNSYM.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock/* ARGSUSED */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrockget_symtab(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Prefer SHT_SYMTAB over SHT_DYNSYM
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if (scn->ds_shdr.sh_type == SHT_DYNSYM && tgt->dt_symidx == 0)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrocksym_compare(const void *a, const void *b)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (-1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Prefer functions over non-functions
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if (GELF_ST_TYPE(syma->se_sym.st_info) == STT_FUNC)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (-1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if (GELF_ST_TYPE(symb->se_sym.st_info) == STT_FUNC)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * For symbols with the same address and type, we sort them according to
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * a hierarchy:
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * 1. weak symbols (common name)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * 2. global symbols (external name)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * 3. local symbols
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if (GELF_ST_BIND(syma->se_sym.st_info) == STB_WEAK)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (-1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if (GELF_ST_BIND(symb->se_sym.st_info) == STB_WEAK)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if (GELF_ST_BIND(syma->se_sym.st_info) == STB_GLOBAL)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (-1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if (GELF_ST_BIND(symb->se_sym.st_info) == STB_GLOBAL)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * As a last resort, if we have multiple symbols of the same type at the
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * same address, prefer the version with the fewest leading underscores.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (-1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (-1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Prefer the symbol with the smaller size.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (-1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (1);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * We really do have two identical symbols for some reason. Just report
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * them as equal, and to the lucky one go the spoils.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (0);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Construct an optimized symbol table sorted by starting address.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock int nsym = 0; /* count of symbols we're not interested in */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Find the symshndx section, if any
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock for (scn = elf_nextscn(tgt->dt_elf, NULL); scn != NULL;
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if ((scn = elf_getscn(tgt->dt_elf, tgt->dt_symidx)) == NULL)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock die("%s: failed to get section information", tgt->dt_filename);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock die("%s: failed to get section header", tgt->dt_filename);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock die("%s: symbol table has zero size", tgt->dt_filename);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock die("%s: failed to get symbol table", tgt->dt_filename);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock tgt->dt_symcount = symdata->d_size / gelf_fsize(tgt->dt_elf, ELF_T_SYM,
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock p_symtab = safe_malloc(tgt->dt_symcount * sizeof (sym_entry_t));
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock for (i = 0, sym = p_symtab; i < tgt->dt_symcount; i++) {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if (gelf_getsym(symdata, i, &(sym->se_sym)) == NULL) {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * We're only interested in data symbols.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if (!IS_DATA_TYPE(GELF_ST_TYPE(sym->se_sym.st_info))) {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if (sym->se_sym.st_shndx == SHN_XINDEX && symshndx != NULL) {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if ((sym->se_name = elf_strptr(tgt->dt_elf, shdr.sh_link,
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock qsort(tgt->dt_symtab, tgt->dt_symcount, sizeof (sym_entry_t),
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Create a target backed by an ELF file.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock elf_begin(tgt->dt_fd, ELF_C_READ, NULL)) == NULL) {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock while ((elf = elf_begin(tgt->dt_fd, cmd, tgt->dt_elf_root)) != NULL) {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Make sure that this Elf file is sane
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * For archives, we drive on in the face of bad
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * members. The "/" and "//" members are
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * special, and should be silently ignored.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * If we're seeing a new Elf object, then we have an
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * archive. In this case, we create a new target, and chain it
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * off the master target. We can later iterate over these
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * targets using dis_tgt_next().
62b628a68db596a2d75a316dc7ffef658079231fAli Bahrami if (elf_getshdrstrndx(elf, ¤t->dt_shstrndx) == -1) {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Final sanity check. If we had an archive with no members, then bail
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * out with a nice message.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the filename associated with the target.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrockconst char *
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the archive member name, if any.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrockconst char *
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the Elf_Ehdr associated with this target. Needed to determine which
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * disassembler to use.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the next target in the list, if this is an archive.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Destroy a target and free up any associated memory.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Given an address, returns the name of the corresponding symbol, as well as
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * the offset within that symbol. If no matching symbol is found, then NULL is
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * returned.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * If 'cache_result' is specified, then we keep track of the resulting symbol.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * This cached result is consulted first on subsequent lookups in order to avoid
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * unecessary lookups. This flag should be used for resolving the current PC,
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * as the majority of addresses stay within the current function.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrockconst char *
dc0093f44ee4fac928e006850f8ed53f68277af5eschrockdis_tgt_lookup(dis_tgt_t *tgt, uint64_t addr, off_t *offset, int cache_result,
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock *offset = addr - tgt->dt_symcache->se_sym.st_value;
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock addr < sym->se_sym.st_value + sym->se_sym.st_size &&
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock (!found || sym->se_sym.st_value > osym->se_sym.st_value)) {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Particularly for .plt objects, it's possible to have
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * a zero sized object. We want to return this, but we
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * want it to be a last resort.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Walk backwards to find the best match.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock } while ((sym->se_sym.st_value == osym->se_sym.st_value) &&
7a65609ec233a8e7a8ea8ec9c0476d86cdbc92ebjmcp (addr < osym->se_sym.st_value + osym->se_sym.st_size));
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock *isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) == STT_FUNC);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Given an address, return the starting offset of the next symbol in the file.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Relies on the fact that this is only used when we encounter a bad instruction
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * in the input stream, so we know that the last symbol looked up will be in the
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock /* make sure the cached symbol and address are valid */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock addr >= sym->se_sym.st_value + sym->se_sym.st_size)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return (0);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock /* find the next symbol */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Iterate over all sections in the target, executing the given callback for
dc0093f44ee4fac928e006850f8ed53f68277af5eschrockdis_tgt_section_iter(dis_tgt_t *tgt, section_iter_f func, void *data)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock for (scn = elf_nextscn(tgt->dt_elf, NULL), idx = 1; scn != NULL;
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if ((sdata.ds_name = elf_strptr(tgt->dt_elf, tgt->dt_shstrndx,
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if ((sdata.ds_data = elf_getdata(scn, NULL)) == NULL) {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return 1 if the given section contains text, 0 otherwise.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock (scn->ds_shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR)));
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return a pointer to the section data.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the size of the section data.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the address for the given section.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the name of the current section.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrockconst char *
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Create an allocated copy of the given section
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Free section memory
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Iterate over all functions in the target, executing the given callback for
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * each one.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrockdis_tgt_function_iter(dis_tgt_t *tgt, function_iter_f func, void *data)
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock for (i = 0, sym = tgt->dt_symtab; i < tgt->dt_symcount; i++, sym++) {
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock /* ignore non-functions */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if ((GELF_ST_TYPE(sym->se_sym.st_info) != STT_FUNC) ||
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock /* get the ELF data associated with this function */
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock if ((scn = elf_getscn(tgt->dt_elf, sym->se_shndx)) == NULL ||
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Verify that the address lies within the section that we think
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock df.df_offset = sym->se_sym.st_value - shdr.sh_addr;
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the data associated with a given function.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock return ((char *)func->df_data->d_buf + func->df_offset);
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the size of a function.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the address of a function.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return the name of the function
dc0093f44ee4fac928e006850f8ed53f68277af5eschrockconst char *
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Return a copy of a function.
dc0093f44ee4fac928e006850f8ed53f68277af5eschrock * Free function memory