dis_main.c revision f7184619589931c4b827180c213074c470f08a8f
2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A
2N/A/*
2N/A * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
2N/A * Use is subject to license terms.
2N/A *
2N/A * Copyright 2011 Jason King. All rights reserved.
2N/A * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
2N/A */
2N/A
2N/A#include <ctype.h>
2N/A#include <getopt.h>
2N/A#include <stdio.h>
2N/A#include <stdlib.h>
2N/A#include <string.h>
2N/A#include <sys/sysmacros.h>
2N/A#include <sys/elf_SPARC.h>
2N/A
2N/A#include <libdisasm.h>
2N/A
2N/A#include "dis_target.h"
2N/A#include "dis_util.h"
2N/A#include "dis_list.h"
2N/A
2N/Aint g_demangle; /* Demangle C++ names */
2N/Aint g_quiet; /* Quiet mode */
2N/Aint g_numeric; /* Numeric mode */
2N/Aint g_flags; /* libdisasm language flags */
2N/Aint g_doall; /* true if no functions or sections were given */
2N/A
2N/Adis_namelist_t *g_funclist; /* list of functions to disassemble, if any */
2N/Adis_namelist_t *g_seclist; /* list of sections to disassemble, if any */
2N/A
2N/A/*
2N/A * Section options for -d, -D, and -s
2N/A */
2N/A#define DIS_DATA_RELATIVE 1
2N/A#define DIS_DATA_ABSOLUTE 2
2N/A#define DIS_TEXT 3
2N/A
2N/A/*
2N/A * libdisasm callback data. Keeps track of current data (function or section)
2N/A * and offset within that data.
2N/A */
2N/Atypedef struct dis_buffer {
2N/A dis_tgt_t *db_tgt; /* current dis target */
2N/A void *db_data; /* function or section data */
2N/A uint64_t db_addr; /* address of function start */
2N/A size_t db_size; /* size of data */
2N/A uint64_t db_nextaddr; /* next address to be read */
2N/A} dis_buffer_t;
2N/A
2N/A#define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */
2N/A
2N/A/*
2N/A * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
2N/A * formatted symbol, based on the offset and current setttings.
2N/A */
2N/Avoid
2N/Agetsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
2N/A size_t buflen)
2N/A{
2N/A if (symbol == NULL || g_numeric) {
2N/A if (g_flags & DIS_OCTAL)
2N/A (void) snprintf(buf, buflen, "0%llo", addr);
2N/A else
2N/A (void) snprintf(buf, buflen, "0x%llx", addr);
2N/A } else {
2N/A if (g_demangle)
2N/A symbol = dis_demangle(symbol);
2N/A
2N/A if (offset == 0)
2N/A (void) snprintf(buf, buflen, "%s", symbol);
2N/A else if (g_flags & DIS_OCTAL)
2N/A (void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
2N/A else
2N/A (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
2N/A }
2N/A}
2N/A
2N/A/*
2N/A * Determine if we are on an architecture with fixed-size instructions,
2N/A * and if so, what size they are.
2N/A */
2N/Astatic int
2N/Ainsn_size(dis_handle_t *dhp)
2N/A{
2N/A int min = dis_min_instrlen(dhp);
2N/A int max = dis_max_instrlen(dhp);
2N/A
2N/A if (min == max)
2N/A return (min);
2N/A
2N/A return (0);
2N/A}
2N/A
2N/A/*
2N/A * The main disassembly routine. Given a fixed-sized buffer and starting
2N/A * address, disassemble the data using the supplied target and libdisasm handle.
2N/A */
2N/Avoid
2N/Adis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
2N/A size_t datalen)
2N/A{
2N/A dis_buffer_t db = { 0 };
2N/A char buf[BUFSIZE];
2N/A char symbuf[BUFSIZE];
2N/A const char *symbol;
2N/A const char *last_symbol;
2N/A off_t symoffset;
2N/A int i;
2N/A int bytesperline;
2N/A size_t symsize;
2N/A int isfunc;
2N/A size_t symwidth = 0;
2N/A int ret;
2N/A int insz = insn_size(dhp);
2N/A
2N/A db.db_tgt = tgt;
2N/A db.db_data = data;
2N/A db.db_addr = addr;
2N/A db.db_size = datalen;
2N/A
2N/A dis_set_data(dhp, &db);
2N/A
2N/A if ((bytesperline = dis_max_instrlen(dhp)) > 6)
2N/A bytesperline = 6;
2N/A
2N/A symbol = NULL;
2N/A
2N/A while (addr < db.db_addr + db.db_size) {
2N/A
2N/A ret = dis_disassemble(dhp, addr, buf, BUFSIZE);
2N/A if (ret != 0 && insz > 0) {
2N/A /*
2N/A * Since we know instructions are fixed size, we
2N/A * always know the address of the next instruction
2N/A */
2N/A (void) snprintf(buf, sizeof (buf),
2N/A "*** invalid opcode ***");
2N/A db.db_nextaddr = addr + insz;
2N/A
2N/A } else if (ret != 0) {
2N/A off_t next;
2N/A
2N/A (void) snprintf(buf, sizeof (buf),
2N/A "*** invalid opcode ***");
2N/A
2N/A /*
2N/A * On architectures with variable sized instructions
2N/A * we have no way to figure out where the next
2N/A * instruction starts if we encounter an invalid
2N/A * instruction. Instead we print the rest of the
2N/A * instruction stream as hex until we reach the
2N/A * next valid symbol in the section.
2N/A */
2N/A if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
2N/A db.db_nextaddr = db.db_addr + db.db_size;
2N/A } else {
2N/A if (next > db.db_size)
2N/A db.db_nextaddr = db.db_addr +
2N/A db.db_size;
2N/A else
2N/A db.db_nextaddr = addr + next;
2N/A }
2N/A }
2N/A
2N/A /*
2N/A * Print out the line as:
2N/A *
2N/A * address: bytes text
2N/A *
2N/A * If there are more than 6 bytes in any given instruction,
2N/A * spread the bytes across two lines. We try to get symbolic
2N/A * information for the address, but if that fails we print out
2N/A * the numeric address instead.
2N/A *
2N/A * We try to keep the address portion of the text aligned at
2N/A * MINSYMWIDTH characters. If we are disassembling a function
2N/A * with a long name, this can be annoying. So we pick a width
2N/A * based on the maximum width that the current symbol can be.
2N/A * This at least produces text aligned within each function.
2N/A */
2N/A last_symbol = symbol;
2N/A symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
2N/A &isfunc);
2N/A if (symbol == NULL) {
2N/A symbol = dis_find_section(tgt, addr, &symoffset);
2N/A symsize = symoffset;
2N/A }
2N/A
2N/A if (symbol != last_symbol)
2N/A getsymname(addr, symbol, symsize, symbuf,
2N/A sizeof (symbuf));
2N/A
2N/A symwidth = MAX(symwidth, strlen(symbuf));
2N/A getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
2N/A
2N/A /*
2N/A * If we've crossed a new function boundary, print out the
2N/A * function name on a blank line.
2N/A */
2N/A if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
2N/A (void) printf("%s()\n", symbol);
2N/A
2N/A (void) printf(" %s:%*s ", symbuf,
2N/A symwidth - strlen(symbuf), "");
2N/A
2N/A /* print bytes */
2N/A for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
2N/A i++) {
2N/A int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
2N/A if (g_flags & DIS_OCTAL)
2N/A (void) printf("%03o ", byte);
2N/A else
2N/A (void) printf("%02x ", byte);
2N/A }
2N/A
2N/A /* trailing spaces for missing bytes */
2N/A for (; i < bytesperline; i++) {
2N/A if (g_flags & DIS_OCTAL)
2N/A (void) printf(" ");
2N/A else
2N/A (void) printf(" ");
2N/A }
2N/A
2N/A /* contents of disassembly */
2N/A (void) printf(" %s", buf);
2N/A
2N/A /* excess bytes that spill over onto subsequent lines */
2N/A for (; i < db.db_nextaddr - addr; i++) {
2N/A int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
2N/A if (i % bytesperline == 0)
2N/A (void) printf("\n %*s ", symwidth, "");
2N/A if (g_flags & DIS_OCTAL)
2N/A (void) printf("%03o ", byte);
2N/A else
2N/A (void) printf("%02x ", byte);
2N/A }
2N/A
2N/A (void) printf("\n");
2N/A
2N/A addr = db.db_nextaddr;
2N/A }
2N/A}
2N/A
2N/A/*
2N/A * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup
2N/A * function, and convert the result using getsymname().
2N/A */
2N/Aint
2N/Ado_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
2N/A size_t *symlen)
2N/A{
2N/A dis_buffer_t *db = data;
2N/A const char *symbol;
2N/A off_t offset;
2N/A size_t size;
2N/A
2N/A /*
2N/A * If NULL symbol is returned, getsymname takes care of
2N/A * printing appropriate address in buf instead of symbol.
2N/A */
2N/A symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
2N/A
2N/A if (buf != NULL)
2N/A getsymname(addr, symbol, offset, buf, buflen);
2N/A
2N/A if (start != NULL)
2N/A *start = addr - offset;
2N/A if (symlen != NULL)
2N/A *symlen = size;
2N/A
2N/A if (symbol == NULL)
2N/A return (-1);
2N/A
2N/A return (0);
2N/A}
2N/A
2N/A/*
2N/A * libdisasm wrapper around target reading. libdisasm will always read data
2N/A * in order, so update our current offset within the buffer appropriately.
2N/A * We only support reading from within the current object; libdisasm should
2N/A * never ask us to do otherwise.
2N/A */
2N/Aint
2N/Ado_read(void *data, uint64_t addr, void *buf, size_t len)
2N/A{
2N/A dis_buffer_t *db = data;
2N/A size_t offset;
2N/A
2N/A if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
2N/A return (-1);
2N/A
2N/A offset = addr - db->db_addr;
2N/A len = MIN(len, db->db_size - offset);
2N/A
2N/A (void) memcpy(buf, (char *)db->db_data + offset, len);
2N/A
2N/A db->db_nextaddr = addr + len;
2N/A
2N/A return (len);
2N/A}
2N/A
2N/A/*
2N/A * Routine to dump raw data in a human-readable format. Used by the -d and -D
2N/A * options. We model our output after the xxd(1) program, which gives nicely
2N/A * formatted output, along with an ASCII translation of the result.
2N/A */
2N/Avoid
2N/Adump_data(uint64_t addr, void *data, size_t datalen)
2N/A{
2N/A uintptr_t curaddr = addr & (~0xf);
2N/A uint8_t *bytes = data;
2N/A int i;
2N/A int width;
2N/A
2N/A /*
2N/A * Determine if the address given to us fits in 32-bit range, in which
2N/A * case use a 4-byte width.
2N/A */
2N/A if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
2N/A width = 8;
2N/A else
2N/A width = 16;
2N/A
2N/A while (curaddr < addr + datalen) {
2N/A /*
2N/A * Display leading address
2N/A */
2N/A (void) printf("%0*x: ", width, curaddr);
2N/A
2N/A /*
2N/A * Print out data in two-byte chunks. If the current address
2N/A * is before the starting address or after the end of the
2N/A * section, print spaces.
2N/A */
2N/A for (i = 0; i < 16; i++) {
2N/A if (curaddr + i < addr ||curaddr + i >= addr + datalen)
2N/A (void) printf(" ");
2N/A else
2N/A (void) printf("%02x",
2N/A bytes[curaddr + i - addr]);
2N/A
2N/A if (i & 1)
2N/A (void) printf(" ");
2N/A }
2N/A
2N/A (void) printf(" ");
2N/A
2N/A /*
2N/A * Print out the ASCII representation
2N/A */
2N/A for (i = 0; i < 16; i++) {
2N/A if (curaddr + i < addr ||
2N/A curaddr + i >= addr + datalen) {
2N/A (void) printf(" ");
2N/A } else {
2N/A uint8_t byte = bytes[curaddr + i - addr];
2N/A if (isprint(byte))
2N/A (void) printf("%c", byte);
2N/A else
2N/A (void) printf(".");
2N/A }
2N/A }
2N/A
2N/A (void) printf("\n");
2N/A
2N/A curaddr += 16;
2N/A }
2N/A}
2N/A
2N/A/*
2N/A * Disassemble a section implicitly specified as part of a file. This function
2N/A * is called for all sections when no other flags are specified. We ignore any
2N/A * data sections, and print out only those sections containing text.
2N/A */
2N/Avoid
2N/Adis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
2N/A{
2N/A dis_handle_t *dhp = data;
2N/A
2N/A /* ignore data sections */
2N/A if (!dis_section_istext(scn))
2N/A return;
2N/A
2N/A if (!g_quiet)
2N/A (void) printf("\nsection %s\n", dis_section_name(scn));
2N/A
2N/A dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
2N/A dis_section_size(scn));
2N/A}
2N/A
2N/A/*
2N/A * Structure passed to dis_named_{section,function} which keeps track of both
2N/A * the target and the libdisasm handle.
2N/A */
2N/Atypedef struct callback_arg {
2N/A dis_tgt_t *ca_tgt;
2N/A dis_handle_t *ca_handle;
2N/A} callback_arg_t;
2N/A
2N/A/*
2N/A * Disassemble a section explicitly named with -s, -d, or -D. The 'type'
2N/A * argument contains the type of argument given. Pass the data onto the
2N/A * appropriate helper routine.
2N/A */
2N/Avoid
2N/Adis_named_section(dis_scn_t *scn, int type, void *data)
2N/A{
2N/A callback_arg_t *ca = data;
2N/A
2N/A if (!g_quiet)
2N/A (void) printf("\nsection %s\n", dis_section_name(scn));
2N/A
2N/A switch (type) {
2N/A case DIS_DATA_RELATIVE:
2N/A dump_data(0, dis_section_data(scn), dis_section_size(scn));
2N/A break;
2N/A case DIS_DATA_ABSOLUTE:
2N/A dump_data(dis_section_addr(scn), dis_section_data(scn),
2N/A dis_section_size(scn));
2N/A break;
case DIS_TEXT:
dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
dis_section_data(scn), dis_section_size(scn));
break;
}
}
/*
* Disassemble a function explicitly specified with '-F'. The 'type' argument
* is unused.
*/
/* ARGSUSED */
void
dis_named_function(dis_func_t *func, int type, void *data)
{
callback_arg_t *ca = data;
dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
dis_function_data(func), dis_function_size(func));
}
/*
* Disassemble a complete file. First, we determine the type of the file based
* on the ELF machine type, and instantiate a version of the disassembler
* appropriate for the file. We then resolve any named sections or functions
* against the file, and iterate over the results (or all sections if no flags
* were specified).
*/
void
dis_file(const char *filename)
{
dis_tgt_t *tgt, *current;
dis_scnlist_t *sections;
dis_funclist_t *functions;
dis_handle_t *dhp;
GElf_Ehdr ehdr;
/*
* First, initialize the target
*/
if ((tgt = dis_tgt_create(filename)) == NULL)
return;
if (!g_quiet)
(void) printf("disassembly for %s\n\n", filename);
/*
* A given file may contain multiple targets (if it is an archive, for
* example). We iterate over all possible targets if this is the case.
*/
for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
dis_tgt_ehdr(current, &ehdr);
/*
* Eventually, this should probably live within libdisasm, and
* we should be able to disassemble targets from different
* architectures. For now, we only support objects as the
* native machine type.
*/
switch (ehdr.e_machine) {
case EM_SPARC:
if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
warn("invalid E_IDENT field for SPARC object");
return;
}
g_flags |= DIS_SPARC_V8;
break;
case EM_SPARC32PLUS:
{
uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK;
if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
warn("invalid E_IDENT field for SPARC object");
return;
}
if (flags != 0 &&
(flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS)
g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
else
g_flags |= DIS_SPARC_V9;
break;
}
case EM_SPARCV9:
if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
warn("invalid E_IDENT field for SPARC object");
return;
}
g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
break;
case EM_386:
g_flags |= DIS_X86_SIZE32;
break;
case EM_AMD64:
g_flags |= DIS_X86_SIZE64;
break;
default:
die("%s: unsupported ELF machine 0x%x", filename,
ehdr.e_machine);
}
/*
* If ET_REL (.o), printing immediate symbols is likely to
* result in garbage, as symbol lookups on unrelocated
* immediates find false and useless matches.
*/
if (ehdr.e_type == ET_REL)
g_flags |= DIS_NOIMMSYM;
if (!g_quiet && dis_tgt_member(current) != NULL)
(void) printf("\narchive member %s\n",
dis_tgt_member(current));
/*
* Instantiate a libdisasm handle based on the file type.
*/
if ((dhp = dis_handle_create(g_flags, current, do_lookup,
do_read)) == NULL)
die("%s: failed to initialize disassembler: %s",
filename, dis_strerror(dis_errno()));
if (g_doall) {
/*
* With no arguments, iterate over all sections and
* disassemble only those that contain text.
*/
dis_tgt_section_iter(current, dis_text_section, dhp);
} else {
callback_arg_t ca;
ca.ca_tgt = current;
ca.ca_handle = dhp;
/*
* If sections or functions were explicitly specified,
* resolve those names against the object, and iterate
* over just the resulting data.
*/
sections = dis_namelist_resolve_sections(g_seclist,
current);
functions = dis_namelist_resolve_functions(g_funclist,
current);
dis_scnlist_iter(sections, dis_named_section, &ca);
dis_funclist_iter(functions, dis_named_function, &ca);
dis_scnlist_destroy(sections);
dis_funclist_destroy(functions);
}
dis_handle_destroy(dhp);
}
dis_tgt_destroy(tgt);
}
void
usage(void)
{
(void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
(void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
exit(2);
}
typedef struct lib_node {
char *path;
struct lib_node *next;
} lib_node_t;
int
main(int argc, char **argv)
{
int optchar;
int i;
lib_node_t *libs = NULL;
g_funclist = dis_namelist_create();
g_seclist = dis_namelist_create();
while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
switch (optchar) {
case 'C':
g_demangle = 1;
break;
case 'd':
dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
break;
case 'D':
dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
break;
case 'F':
dis_namelist_add(g_funclist, optarg, 0);
break;
case 'l': {
/*
* The '-l foo' option historically would attempt to
* disassemble '$LIBDIR/libfoo.a'. The $LIBDIR
* environment variable has never been supported or
* documented for our linker. However, until this
* option is formally EOLed, we have to support it.
*/
char *dir;
lib_node_t *node;
size_t len;
if ((dir = getenv("LIBDIR")) == NULL ||
dir[0] == '\0')
dir = "/usr/lib";
node = safe_malloc(sizeof (lib_node_t));
len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
node->path = safe_malloc(len);
(void) snprintf(node->path, len, "%s/lib%s.a", dir,
optarg);
node->next = libs;
libs = node;
break;
}
case 'L':
/*
* The '-L' option historically would attempt to read
* the .debug section of the target to determine source
* line information in order to annotate the output.
* No compiler has emitted these sections in many years,
* and the option has never done what it purported to
* do. We silently consume the option for
* compatibility.
*/
break;
case 'n':
g_numeric = 1;
break;
case 'o':
g_flags |= DIS_OCTAL;
break;
case 'q':
g_quiet = 1;
break;
case 't':
dis_namelist_add(g_seclist, optarg, DIS_TEXT);
break;
case 'V':
(void) printf("Solaris disassembler version 1.0\n");
return (0);
default:
usage();
break;
}
}
argc -= optind;
argv += optind;
if (argc == 0 && libs == NULL) {
warn("no objects specified");
usage();
}
if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
g_doall = 1;
/*
* See comment for 'l' option, above.
*/
while (libs != NULL) {
lib_node_t *node = libs->next;
dis_file(libs->path);
free(libs->path);
free(libs);
libs = node;
}
for (i = 0; i < argc; i++)
dis_file(argv[i]);
dis_namelist_destroy(g_funclist);
dis_namelist_destroy(g_seclist);
return (g_error);
}