output.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Routines for preparing tdata trees for conversion into CTF data, and
* for placing the resulting data into an output file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <libelf.h>
#include <gelf.h>
#include <unistd.h>
#include "ctftools.h"
#include "list.h"
#include "memory.h"
#include "traverse.h"
#include "symbol.h"
typedef struct iidesc_match {
int iim_fuzzy;
iidesc_t *iim_ret;
char *iim_name;
char *iim_file;
uchar_t iim_bind;
} iidesc_match_t;
static int
burst_iitypes(void *data, void *arg)
{
iidesc_t *ii = data;
iiburst_t *iiburst = arg;
switch (ii->ii_type) {
case II_GFUN:
case II_SFUN:
case II_GVAR:
case II_SVAR:
if (!(ii->ii_flags & IIDESC_F_USED))
return (0);
break;
default:
break;
}
ii->ii_dtype->t_flags |= TDESC_F_ISROOT;
(void) iitraverse_td(ii, iiburst->iib_tdtd);
return (1);
}
/*ARGSUSED1*/
static int
save_type_by_id(tdesc_t *tdp, tdesc_t **tdpp, void *private)
{
iiburst_t *iiburst = private;
/*
* Doing this on every node is horribly inefficient, but given that
* we may be suppressing some types, we can't trust nextid in the
* tdata_t.
*/
if (tdp->t_id > iiburst->iib_maxtypeid)
iiburst->iib_maxtypeid = tdp->t_id;
slist_add(&iiburst->iib_types, tdp, tdesc_idcmp);
return (1);
}
static tdtrav_cb_f burst_types_cbs[] = {
NULL,
save_type_by_id, /* intrinsic */
save_type_by_id, /* pointer */
save_type_by_id, /* array */
save_type_by_id, /* function */
save_type_by_id, /* struct */
save_type_by_id, /* union */
save_type_by_id, /* enum */
save_type_by_id, /* forward */
save_type_by_id, /* typedef */
tdtrav_assert, /* typedef_unres */
save_type_by_id, /* volatile */
save_type_by_id, /* const */
save_type_by_id /* restrict */
};
static iiburst_t *
iiburst_new(tdata_t *td, int max)
{
iiburst_t *iiburst = xcalloc(sizeof (iiburst_t));
iiburst->iib_td = td;
iiburst->iib_funcs = xcalloc(sizeof (iidesc_t *) * max);
iiburst->iib_nfuncs = 0;
iiburst->iib_objts = xcalloc(sizeof (iidesc_t *) * max);
iiburst->iib_nobjts = 0;
return (iiburst);
}
static void
iiburst_types(iiburst_t *iiburst)
{
tdtrav_data_t tdtd;
tdtrav_init(&tdtd, &iiburst->iib_td->td_curvgen, NULL, burst_types_cbs,
NULL, (void *)iiburst);
iiburst->iib_tdtd = &tdtd;
(void) hash_iter(iiburst->iib_td->td_iihash, burst_iitypes, iiburst);
}
static void
iiburst_free(iiburst_t *iiburst)
{
free(iiburst->iib_funcs);
free(iiburst->iib_objts);
list_free(iiburst->iib_types, NULL, NULL);
free(iiburst);
}
/*
* See if this iidesc matches the ELF symbol data we pass in.
*
* A fuzzy match is where we have a local symbol matching the name of a
* global type description. This is common when a mapfile is used for a
* DSO, but we don't accept it by default.
*
* A weak fuzzy match is when a weak symbol was resolved and matched to
* a global type description.
*/
static int
matching_iidesc(iidesc_t *iidesc, iidesc_match_t *match)
{
if (streq(iidesc->ii_name, match->iim_name) == 0)
return (0);
switch (iidesc->ii_type) {
case II_GFUN:
case II_GVAR:
if (match->iim_bind == STB_GLOBAL) {
match->iim_ret = iidesc;
return (-1);
} else if (match->iim_fuzzy && match->iim_ret == NULL) {
match->iim_ret = iidesc;
/* continue to look for strong match */
return (0);
}
break;
case II_SFUN:
case II_SVAR:
if (match->iim_bind == STB_LOCAL &&
match->iim_file != NULL &&
streq(iidesc->ii_owner, match->iim_file)) {
match->iim_ret = iidesc;
return (-1);
}
break;
}
return (0);
}
static iidesc_t *
find_iidesc(hash_t *hash, iidesc_match_t *match)
{
iidesc_t tmpdesc;
match->iim_ret = NULL;
bzero(&tmpdesc, sizeof (iidesc_t));
tmpdesc.ii_name = match->iim_name;
(void) hash_match(hash, &tmpdesc, (int (*)())matching_iidesc, match);
return (match->iim_ret);
}
/*
* If we have a weak symbol, attempt to find the strong symbol it will
* resolve to. Note: the code where this actually happens is in
* sym_process() in cmd/sgs/libld/common/syms.c
*
* Finding the matching symbol is unfortunately not trivial. For a
* symbol to be a candidate, it must:
*
* - have the same type (function, object)
* - have the same value (address)
* - have the same size
* - not be another weak symbol
* - belong to the same section (checked via section index)
*
* If such a candidate is global, then we assume we've found it. The
* linker generates the symbol table such that the curfile might be
* incorrect; this is OK for global symbols, since find_iidesc() doesn't
* need to check for the source file for the symbol.
*
* We might have found a strong local symbol, where the curfile is
* accurate and matches that of the weak symbol. We assume this is a
* reasonable match.
*
* If we've got a local symbol with a non-matching curfile, there are
* two possibilities. Either this is a completely different symbol, or
* it's a once-global symbol that was scoped to local via a mapfile. In
* the latter case, curfile is likely inaccurate since the linker does
* not preserve the needed curfile in the order of the symbol table (see
* the comments about locally scoped symbols in libld's update_osym()).
* As we can't tell this case from the former one, we use this symbol
* iff no other matching symbol is found.
*
* What we really need here is a SUNW section containing weak<->strong
* mappings that we can consume.
*/
static int
check_for_weak(GElf_Sym *weak, char const *weakfile,
Elf_Data *data, int nent, Elf_Data *strdata,
GElf_Sym *retsym, char **curfilep)
{
char *curfile = NULL;
char *tmpfile;
GElf_Sym tmpsym;
int candidate = 0;
int i;
if (GELF_ST_BIND(weak->st_info) != STB_WEAK)
return (0);
for (i = 0; i < nent; i++) {
GElf_Sym sym;
uchar_t type;
if (gelf_getsym(data, i, &sym) == NULL)
continue;
type = GELF_ST_TYPE(sym.st_info);
if (type == STT_FILE)
curfile = (char *)strdata->d_buf + sym.st_name;
if (GELF_ST_TYPE(weak->st_info) != type ||
weak->st_value != sym.st_value)
continue;
if (weak->st_size != sym.st_size)
continue;
if (GELF_ST_BIND(sym.st_info) == STB_WEAK)
continue;
if (sym.st_shndx != weak->st_shndx)
continue;
if (GELF_ST_BIND(sym.st_info) == STB_LOCAL &&
(curfile == NULL || weakfile == NULL ||
strcmp(curfile, weakfile) != 0)) {
candidate = 1;
tmpfile = curfile;
tmpsym = sym;
continue;
}
*curfilep = curfile;
*retsym = sym;
return (1);
}
if (candidate) {
*curfilep = tmpfile;
*retsym = tmpsym;
return (1);
}
return (0);
}
/*
* When we've found the underlying symbol's type description
* for a weak symbol, we need to copy it and rename it to match
* the weak symbol. We also need to add it to the td so it's
* handled along with the others later.
*/
static iidesc_t *
copy_from_strong(tdata_t *td, GElf_Sym *sym, iidesc_t *strongdesc,
const char *weakname, const char *weakfile)
{
iidesc_t *new = iidesc_dup_rename(strongdesc, weakname, weakfile);
uchar_t type = GELF_ST_TYPE(sym->st_info);
switch (type) {
case STT_OBJECT:
new->ii_type = II_GVAR;
break;
case STT_FUNC:
new->ii_type = II_GFUN;
break;
}
hash_add(td->td_iihash, new);
return (new);
}
/*
* Process the symbol table of the output file, associating each symbol
* with a type description if possible, and sorting them into functions
* and data, maintaining symbol table order.
*/
static iiburst_t *
sort_iidescs(Elf *elf, const char *file, tdata_t *td, int fuzzymatch,
int dynsym)
{
iiburst_t *iiburst;
Elf_Scn *scn;
GElf_Shdr shdr;
Elf_Data *data, *strdata;
int i, stidx;
int nent;
iidesc_match_t match;
match.iim_fuzzy = fuzzymatch;
match.iim_file = NULL;
if ((stidx = findelfsecidx(elf, dynsym ? ".dynsym" : ".symtab")) < 0)
terminate("%s: Can't open symbol table\n", file);
scn = elf_getscn(elf, stidx);
data = elf_getdata(scn, NULL);
gelf_getshdr(scn, &shdr);
nent = shdr.sh_size / shdr.sh_entsize;
scn = elf_getscn(elf, shdr.sh_link);
strdata = elf_getdata(scn, NULL);
iiburst = iiburst_new(td, nent);
for (i = 0; i < nent; i++) {
GElf_Sym sym;
iidesc_t **tolist;
GElf_Sym ssym;
iidesc_match_t smatch;
int *curr;
iidesc_t *iidesc;
if (gelf_getsym(data, i, &sym) == NULL)
elfterminate(file, "Couldn't read symbol %d", i);
match.iim_name = (char *)strdata->d_buf + sym.st_name;
match.iim_bind = GELF_ST_BIND(sym.st_info);
switch (GELF_ST_TYPE(sym.st_info)) {
case STT_FILE:
match.iim_file = match.iim_name;
continue;
case STT_OBJECT:
tolist = iiburst->iib_objts;
curr = &iiburst->iib_nobjts;
break;
case STT_FUNC:
tolist = iiburst->iib_funcs;
curr = &iiburst->iib_nfuncs;
break;
default:
continue;
}
if (ignore_symbol(&sym, match.iim_name))
continue;
iidesc = find_iidesc(td->td_iihash, &match);
if (iidesc != NULL) {
tolist[*curr] = iidesc;
iidesc->ii_flags |= IIDESC_F_USED;
(*curr)++;
continue;
}
if (!check_for_weak(&sym, match.iim_file, data, nent, strdata,
&ssym, &smatch.iim_file)) {
(*curr)++;
continue;
}
smatch.iim_fuzzy = fuzzymatch;
smatch.iim_name = (char *)strdata->d_buf + ssym.st_name;
smatch.iim_bind = GELF_ST_BIND(ssym.st_info);
debug(3, "Weak symbol %s resolved to %s\n", match.iim_name,
smatch.iim_name);
iidesc = find_iidesc(td->td_iihash, &smatch);
if (iidesc != NULL) {
tolist[*curr] = copy_from_strong(td, &sym,
iidesc, match.iim_name, match.iim_file);
tolist[*curr]->ii_flags |= IIDESC_F_USED;
}
(*curr)++;
}
/*
* Stabs are generated for every function declared in a given C source
* file. When converting an object file, we may encounter a stab that
* has no symbol table entry because the optimizer has decided to omit
* that item (for example, an unreferenced static function). We may
* see iidescs that do not have an associated symtab entry, and so
* we do not write records for those functions into the CTF data.
* All others get marked as a root by this function.
*/
iiburst_types(iiburst);
/*
* By not adding some of the functions and/or objects, we may have
* caused some types that were referenced solely by those
* functions/objects to be suppressed. This could cause a label,
* generated prior to the evisceration, to be incorrect. Find the
* highest type index, and change the label indicies to be no higher
* than this value.
*/
tdata_label_newmax(td, iiburst->iib_maxtypeid);
return (iiburst);
}
static void
write_file(Elf *src, const char *srcname, Elf *dst, const char *dstname,
caddr_t ctfdata, size_t ctfsize, int flags)
{
GElf_Ehdr sehdr, dehdr;
Elf_Scn *sscn, *dscn;
Elf_Data *sdata, *ddata;
GElf_Shdr shdr;
GElf_Word symtab_type;
int symtab_idx = -1;
off_t new_offset = 0;
off_t ctfnameoff = 0;
int dynsym = (flags & CTF_USE_DYNSYM);
int keep_stabs = (flags & CTF_KEEP_STABS);
int *secxlate;
int srcidx, dstidx;
int curnmoff = 0;
int changing = 0;
int pad;
int i;
if (gelf_newehdr(dst, gelf_getclass(src)) == NULL)
elfterminate(dstname, "Cannot copy ehdr to temp file");
gelf_getehdr(src, &sehdr);
memcpy(&dehdr, &sehdr, sizeof (GElf_Ehdr));
gelf_update_ehdr(dst, &dehdr);
symtab_type = dynsym ? SHT_DYNSYM : SHT_SYMTAB;
/*
* Neither the existing stab sections nor the SUNW_ctf sections (new or
* existing) are SHF_ALLOC'd, so they won't be in areas referenced by
* program headers. As such, we can just blindly copy the program
* headers from the existing file to the new file.
*/
if (sehdr.e_phnum != 0) {
(void) elf_flagelf(dst, ELF_C_SET, ELF_F_LAYOUT);
if (gelf_newphdr(dst, sehdr.e_phnum) == NULL)
elfterminate(dstname, "Cannot make phdrs in temp file");
for (i = 0; i < sehdr.e_phnum; i++) {
GElf_Phdr phdr;
gelf_getphdr(src, i, &phdr);
gelf_update_phdr(dst, i, &phdr);
}
}
secxlate = xmalloc(sizeof (int) * sehdr.e_shnum);
for (srcidx = dstidx = 0; srcidx < sehdr.e_shnum; srcidx++) {
Elf_Scn *scn = elf_getscn(src, srcidx);
GElf_Shdr shdr;
char *sname;
gelf_getshdr(scn, &shdr);
sname = elf_strptr(src, sehdr.e_shstrndx, shdr.sh_name);
if (sname == NULL) {
elfterminate(srcname, "Can't find string at %u",
shdr.sh_name);
}
if (strcmp(sname, CTF_ELF_SCN_NAME) == 0) {
secxlate[srcidx] = -1;
} else if (!keep_stabs &&
(strncmp(sname, ".stab", 5) == 0 ||
strncmp(sname, ".debug", 6) == 0 ||
strncmp(sname, ".rel.debug", 10) == 0 ||
strncmp(sname, ".rela.debug", 11) == 0)) {
secxlate[srcidx] = -1;
} else if (dynsym && shdr.sh_type == SHT_SYMTAB) {
/*
* If we're building CTF against the dynsym,
* we'll rip out the symtab so debuggers aren't
* confused.
*/
secxlate[srcidx] = -1;
} else {
secxlate[srcidx] = dstidx++;
curnmoff += strlen(sname) + 1;
}
new_offset = (off_t)dehdr.e_phoff;
}
for (srcidx = 1; srcidx < sehdr.e_shnum; srcidx++) {
char *sname;
sscn = elf_getscn(src, srcidx);
gelf_getshdr(sscn, &shdr);
if (secxlate[srcidx] == -1) {
changing = 1;
continue;
}
dscn = elf_newscn(dst);
/*
* If this file has program headers, we need to explicitly lay
* out sections. If none of the sections prior to this one have
* been removed, then we can just use the existing location. If
* one or more sections have been changed, then we need to
* adjust this one to avoid holes.
*/
if (changing && sehdr.e_phnum != 0) {
pad = new_offset % shdr.sh_addralign;
if (pad)
new_offset += shdr.sh_addralign - pad;
shdr.sh_offset = new_offset;
}
shdr.sh_link = secxlate[shdr.sh_link];
if (shdr.sh_type == SHT_REL || shdr.sh_type == SHT_RELA)
shdr.sh_info = secxlate[shdr.sh_info];
sname = elf_strptr(src, sehdr.e_shstrndx, shdr.sh_name);
if (sname == NULL) {
elfterminate(srcname, "Can't find string at %u",
shdr.sh_name);
}
if ((sdata = elf_getdata(sscn, NULL)) == NULL)
elfterminate(srcname, "Cannot get sect %s data", sname);
if ((ddata = elf_newdata(dscn)) == NULL)
elfterminate(dstname, "Can't make sect %s data", sname);
bcopy(sdata, ddata, sizeof (Elf_Data));
if (srcidx == sehdr.e_shstrndx) {
char seclen = strlen(CTF_ELF_SCN_NAME);
ddata->d_buf = xmalloc(ddata->d_size + shdr.sh_size +
seclen + 1);
bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);
strcpy((caddr_t)ddata->d_buf + shdr.sh_size,
CTF_ELF_SCN_NAME);
ctfnameoff = (off_t)shdr.sh_size;
shdr.sh_size += seclen + 1;
ddata->d_size += seclen + 1;
if (sehdr.e_phnum != 0)
changing = 1;
}
if (shdr.sh_type == symtab_type && shdr.sh_entsize != 0) {
int nsym = shdr.sh_size / shdr.sh_entsize;
symtab_idx = secxlate[srcidx];
ddata->d_buf = xmalloc(shdr.sh_size);
bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);
for (i = 0; i < nsym; i++) {
GElf_Sym sym;
short newscn;
(void) gelf_getsym(ddata, i, &sym);
if (sym.st_shndx >= SHN_LORESERVE)
continue;
if ((newscn = secxlate[sym.st_shndx]) !=
sym.st_shndx) {
sym.st_shndx =
(newscn == -1 ? 1 : newscn);
gelf_update_sym(ddata, i, &sym);
}
}
}
if (gelf_update_shdr(dscn, &shdr) == NULL)
elfterminate(dstname, "Cannot update sect %s", sname);
new_offset = (off_t)shdr.sh_offset;
if (shdr.sh_type != SHT_NOBITS)
new_offset += shdr.sh_size;
}
if (symtab_idx == -1) {
terminate("Cannot find %s section\n",
dynsym ? "SHT_DYNSYM" : "SHT_SYMTAB");
}
/* Add the ctf section */
dscn = elf_newscn(dst);
gelf_getshdr(dscn, &shdr);
shdr.sh_name = ctfnameoff;
shdr.sh_type = SHT_PROGBITS;
shdr.sh_size = ctfsize;
shdr.sh_link = symtab_idx;
shdr.sh_addralign = 4;
if (changing && sehdr.e_phnum != 0) {
pad = new_offset % shdr.sh_addralign;
if (pad)
new_offset += shdr.sh_addralign - pad;
shdr.sh_offset = new_offset;
new_offset += shdr.sh_size;
}
ddata = elf_newdata(dscn);
ddata->d_buf = ctfdata;
ddata->d_size = ctfsize;
ddata->d_align = shdr.sh_addralign;
gelf_update_shdr(dscn, &shdr);
/* update the section header location */
if (sehdr.e_phnum != 0) {
size_t align = gelf_fsize(dst, ELF_T_ADDR, 1, EV_CURRENT);
size_t r = new_offset % align;
if (r)
new_offset += align - r;
dehdr.e_shoff = new_offset;
}
/* commit to disk */
dehdr.e_shstrndx = secxlate[sehdr.e_shstrndx];
gelf_update_ehdr(dst, &dehdr);
if (elf_update(dst, ELF_C_WRITE) < 0)
elfterminate(dstname, "Cannot finalize temp file");
free(secxlate);
}
static caddr_t
make_ctf_data(tdata_t *td, Elf *elf, const char *file, size_t *lenp, int flags)
{
iiburst_t *iiburst;
caddr_t data;
iiburst = sort_iidescs(elf, file, td, flags & CTF_FUZZY_MATCH,
flags & CTF_USE_DYNSYM);
data = ctf_gen(iiburst, lenp, flags & CTF_COMPRESS);
iiburst_free(iiburst);
return (data);
}
void
write_ctf(tdata_t *td, const char *curname, const char *newname, int flags)
{
struct stat st;
Elf *elf = NULL;
Elf *telf = NULL;
caddr_t data;
size_t len;
int fd = -1;
int tfd = -1;
(void) elf_version(EV_CURRENT);
if ((fd = open(curname, O_RDONLY)) < 0 || fstat(fd, &st) < 0)
terminate("%s: Cannot open for re-reading", curname);
if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
elfterminate(curname, "Cannot re-read");
if ((tfd = open(newname, O_WRONLY | O_CREAT | O_TRUNC, st.st_mode)) < 0)
terminate("Cannot open temp file %s for writing", newname);
if ((telf = elf_begin(tfd, ELF_C_WRITE, NULL)) == NULL)
elfterminate(curname, "Cannot write");
data = make_ctf_data(td, elf, curname, &len, flags);
write_file(elf, curname, telf, newname, data, len, flags);
free(data);
elf_end(telf);
elf_end(elf);
(void) close(fd);
(void) close(tfd);
}