fake_shdr.c revision 39773e466ff90ce703d7f52f3267d7e96c09c6f5
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Generate a cache of section header information for an ELF
* object from the information found in its program headers.
*
* Malicious code can remove or corrupt section headers. The
* resulting program will be difficult to analyze, but is still
* runnable. Hence, scribbling on the section headers or removing
* them is an effective form of obfuscation. On the other hand,
* program headers must be accurate or the program will not run.
* Section headers derived from them will necessarily lack information
* found in the originals (particularly for non-allocable sections),
* but will provide essential symbol information. The focus is on
* recovering information that elfdump knows how to display, and that
* might be interesting in a forensic situation.
*
* There are some things we don't attempt to create sections for:
*
* plt, got
* We have no way to determine the length of either of
* these sections from the information available via
* the program headers or dynamic section. The data in
* the PLT is of little use to elfdump. The data in the
* GOT might be somewhat more interesting, especially as
* it pertains to relocations. However, the sizing issue
* remains.
*
* text, data, bss
* Although we could create these, there is little value
* to doing so. elfdump cannot display the arbitrary
* data in these sections, so this would amount to a
* simple repetition of the information already displayed
* in the program headers, with no additional benefit.
*/
#include <machdep.h>
#include <sys/elf_amd64.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <strings.h>
#include <conv.h>
#include <msg.h>
#include <_elfdump.h>
/*
* Common information about the object that is needed by
* all the routines in this module.
*/
typedef struct {
const char *file;
int fd;
} FSTATE;
/*
* These values uniquely identify the sections that we know
* how to recover.
*
* Note: We write the sections to the cache array in this same order.
* It simplifies this code if the dynamic, dynstr, dynsym, and ldynsym
* sections occupy known slots in the cache array. Other sections reference
* them by index, and if they are at a known spot, there is no need
* for a fixup pass. Putting them in positions [1-4] solves this.
*
* The order they are in was chosen such that if any one of them exists,
* all of the ones before it must also exist. This means that if the
* desired section exists, it will end up in the desired index in the
* cache array.
*
* The order of the other sections is arbitrary. I've arranged them
* in roughly related groups.
*/
typedef enum {
SINFO_T_NULL = 0,
SINFO_T_DYN = 1,
SINFO_T_DYNSTR = 2,
SINFO_T_DYNSYM = 3,
SINFO_T_LDYNSYM = 4,
SINFO_T_HASH = 5,
SINFO_T_SYMINFO = 6,
SINFO_T_SYMSORT = 7,
SINFO_T_TLSSORT = 8,
SINFO_T_VERNEED = 9,
SINFO_T_VERDEF = 10,
SINFO_T_VERSYM = 11,
SINFO_T_INTERP = 12,
SINFO_T_CAP = 13,
SINFO_T_UNWIND = 14,
SINFO_T_MOVE = 15,
SINFO_T_REL = 16,
SINFO_T_RELA = 17,
SINFO_T_PREINITARR = 18,
SINFO_T_INITARR = 19,
SINFO_T_FINIARR = 20,
SINFO_T_NOTE = 21,
} SINFO_TYPE;
/*
* Table of per-section constant data used to set up the section
* header cache and the various sub-parts it references. Indexed by
* SINFO_T value.
*
* note: The sh_flags value should be either SHF_ALLOC, or 0.
* get_data() sets SHF_WRITE if the program header containing the
* section is writable. The other flags require information that
* the program headers don't contain (i.e. SHF_STRINGS, etc) so
* we don't set them.
*/
typedef struct {
const char *name;
} SINFO_DATA;
/* SINFO_T_NULL */
{ 0 },
/* SINFO_T_DYN */
/* SINFO_T_DYNSTR */
1, 0, ELF_T_BYTE },
/* SINFO_T_DYNSYM */
/* SINFO_T_LDYNSYM */
/* SINFO_T_HASH */
/* SINFO_T_SYMINFO */
/* SINFO_T_SYMSORT */
/* SINFO_T_TLSSORT */
/* SINFO_T_VERNEED */
/* SINFO_T_VERDEF */
/* SINFO_T_VERSYM */
/* SINFO_T_INTERP */
1, 0, ELF_T_BYTE },
/* SINFO_T_CAP */
/* SINFO_T_UNWIND */
sizeof (Addr), 0, ELF_T_BYTE },
/* SINFO_T_MOVE */
/* SINFO_T_REL */
/* SINFO_T_RELA */
/* SINFO_T_PREINITARR */
/* SINFO_T_INITARR */
/* SINFO_T_FINIARR */
/* SINFO_T_NOTE */
};
/*
* As we read program headers and dynamic elements, we build up
* the data for our fake section headers in variables of the
* SINFO type. SINFO is used to track the sections that can only
* appear a fixed number of times (usually once).
*
* SINFO_LISTELT is used for sections that can occur an arbitrary
* number of times. They are kept in a doubly linked circular
* buffer.
*/
typedef struct {
/* vaddr is 0. Used by program headers */
} SINFO;
typedef struct _sinfo_listelt {
struct _sinfo_listelt *next;
struct _sinfo_listelt *prev;
/*
* Free dynamic memory used by SINFO structures.
*
* entry:
* sinfo - Address of first SINFO structure to free
* n - # of structures to clear
*
* exit:
* For each SINFO struct, the section header, data descriptor,
* and data buffer are freed if non-NULL. The relevant
* fields are set to NULL, and the type is set to SINFO_T_NULL.
*/
static void
{
for (; n-- > 0; sinfo++) {
}
}
}
}
/*
* Allocate a new SINFO_LISTELT and put it at the end of the
* doubly linked list anchored by the given list root node.
*
* On success, a new node has been put at the end of the circular
* doubly linked list, and a pointer to the SINFO sub-structure is
* returned. On failure, an error is printed, and NULL is returned.
*/
static SINFO *
{
return (0);
}
}
/*
* Release the memory used by the given list, restoring it to
* an empty list.
*/
static void
{
}
/*
* Given a virtual address and desired size of the data to be found
* at that address, look through the program headers for the PT_LOAD
* segment that contains it and return the offset within the ELF file
* at which it resides.
*
* entry:
* fstate - Object state
* addr - virtual address to be translated
* size - Size of the data to be found at that address, in bytes
* zero_bytes - NULL, or address to receive the number of data
* bytes at the end of the data that are not contained
* in the file, and which must be zero filled by the caller.
* If zero_bytes is NULL, the file must contain all of the
* desired data. If zero_bytes is not NULL, then the program
* header must reserve the space for all of the data (p_memsz)
* but it is acceptable for only part of the data to be in
* the file (p_filesz). *zero_bytes is set to the difference
* in size, and is the number of bytes the caller must
* set to 0 rather than reading from the file.
* phdr_ret - NULL, or address of variable to receive pointer
* to program header that contains offset.
* exit:
* On success: If zero_bytes is non-NULL, it is updated. If phdr_ret
* is non-NULL, it is updated. The file offset is returned.
*
* On failure, 0 is returned. Since any ELF file we can understand
* must start with an ELF magic number, 0 cannot be a valid file
* offset for a virtual address, and is therefore unambiguous as
* a failure indication.
*/
static Off
{
continue;
/*
* Subtract segment virtual address, leaving the
* offset relative to the segment (not the file).
*/
/*
* Is there enough data in the file to satisfy
* the request? If zero_bytes is NULL, it must
* all be in the file. Otherwise it can be
* zero filled.
*/
if (zero_bytes == NULL) {
if (size > avail_file)
continue;
} else {
(size - avail_file) : 0;
}
/* Add segment file offset, giving overall offset */
}
}
/* If we get here, the mapping failed */
return (0);
}
/*
* This routine is the same thing as map_addr_to_offset(), except that
* it goes the other way, mapping from offset to virtual address.
*
* The comments for map_addr_to_offset() are applicable if you
* reverse offset and address.
*/
static Addr
{
continue;
/*
* Subtract segment offset, leaving the
* offset relative to the segment (not the file).
*/
/*
* Is there enough data in the file to satisfy
* the request? If zero_bytes is NULL, it must
* all be in the file. Otherwise it can be
* zero filled.
*/
if (zero_bytes == NULL) {
if (size > avail_file)
continue;
} else {
(size - avail_file) : 0;
}
/* Add segment virtual address, giving overall addr */
}
}
/* If we get here, the mapping failed */
return (0);
}
/*
* Use elf_xlatetom() to convert the bytes in buf from their
* in-file representation to their in-memory representation.
*
* Returns True(1) for success. On failure, an error message is printed
* and False(0) is returned.
*/
static int
{
return (0);
}
return (1);
}
/*
* Read nbytes of data into buf, starting at the specified offset
* within the ELF file.
*
* entry:
* fstate - Object state
* offset - Offset within the file at which desired data resides.
* buf - Buffer to receive the data
* nbyte - # of bytes to read into buf
* xlate_type - An ELF xlate type, specifying the type of data
* being input. If xlate_type is ELF_T_BYTE, xlate is not
* done. Otherwise, xlate_data() is called to convert the
* data into its in-memory representation.
* exit:
* On success, the data has been written into buf, xlate_data()
* called on it if required, and True(1) is returned. Otherwise
* False(0) is returned.
*
* note:
* This routine does not move the file pointer.
*/
static int
{
return (0);
}
if (xlate_type != ELF_T_BYTE)
return (1);
}
/*
* table found at the given virtual address in the mapped ELF object.
*
* On success, *nbucket, and *nchain have been filled in with their
* values, *total contains the number of elements in the hash table,
* and this routine returns True (1).
*
* On failure, False (0) is returned.
*/
static int
{
if (offset == 0)
return (0);
return (0);
return (1);
}
/*
* Read a Verdef structure at the specified file offset and return
* its vd_cnt, vd_aux, and vd_next fields.
*/
static int
{
ELF_T_BYTE) == 0)
return (0);
/* xlate vd_cnt */
ELF_T_HALF) == 0)
return (0);
/*
* xlate vd_aux and vd_next. These items are adjacent and are
* both Words, so they can be handled in a single operation.
*/
return (0);
return (1);
}
/*
* Read a Verdaux structure at the specified file offset and return
* its vda_next field.
*/
static int
{
ELF_T_BYTE) == 0)
return (0);
/* xlate vda_next */
ELF_T_WORD) == 0)
return (0);
return (1);
}
/*
* Read a Verneed structure at the specified file offset and return
* its vn_cnt, vn_aux, and vn_next fields.
*/
static int
{
ELF_T_BYTE) == 0)
return (0);
/* xlate vn_cnt */
ELF_T_HALF) == 0)
return (0);
/*
* xlate vn_aux and vn_next. These items are adjacent and are
* both Words, so they can be handled in a single operation.
*/
return (0);
return (1);
}
/*
* Read a Vernaux structure at the specified file offset and return
* its vna_next field.
*/
static int
{
ELF_T_BYTE) == 0)
return (0);
/* xlate vna_next */
ELF_T_WORD) == 0)
return (0);
return (1);
}
/*
* Compute the size of Verdef and Verneed sections. Both of these
* sections are made up of interleaved main nodes (Verdef and Verneed)
* and auxiliary blocks (Verdaux and Vernaux). These nodes refer to
* each other by relative offsets. The linker has a lot of flexibility
* in how it lays out these items, and we cannot assume a standard
* layout. To determine the size of the section, we must read each
* main node and compute the high water mark of the memory it and its
* auxiliary structs access.
*
* their logical organization is the same. Each main block has
* a cnt field that tells how many auxiliary blocks it has, an
* aux field that gives the offset of the first auxiliary block, and
* an offset to the next main block. Each auxiliary block contains
* an offset to the next auxiliary block. By breaking the type specific
* code into separate sub-functions, we can process both Verdef and
* sections Verdaux from a single routine.
*
* entry:
* fstate - Object state
* sec - Section to be processed (SINFO_T_VERDEF or SINFO_T_VERNEED).
*
* exit:
* On success, sec->size is set to the section size in bytes, and
* True (1) is returned. On failure, False (0) is returned.
*/
static int
{
/*
* Set up the function pointers to the type-specific code
* for fetching data from the main and auxiliary blocks.
*/
} else { /* SINFO_T_VERNEED */
}
/*
* Map starting address to file offset. Save the starting offset
* in the SINFO size field. Once we have the high water offset, we
* can subtract this from it to get the size.
*
* Note: The size argument set here is a lower bound --- the
* size of the main blocks without any auxiliary ones. It's
* the best we can do until the size has been determined for real.
*/
if (offset == 0)
return (0);
/* Does this move the high water mark up? */
return (0);
/*
* If there are auxiliary structures referenced,
* check their position to see if it pushes
* the high water mark.
*/
return (0);
}
}
return (1);
}
/*
* Allocate and fill in a fake section header, data descriptor,
* and data buffer for the given section. Fill them in and read
* the associated data into the buffer.
*
* entry:
* fstate - Object state
* sec - Section information
*
* exit:
* On success, the actions described above are complete, and
* True (1) is returned.
*
* On failure, an error is reported, all resources used by sec
* are released, and sec->type is set to SINFO_T_NULL, effectively
* eliminating its contents from any further use. False (0) is
* returned.
*/
static int
{
/*
* If this is a NULL section, or if we've already processed
* this item, then we are already done.
*/
return (1);
return (0);
}
/*
* Fill in fake section header
*
* sh_name should be the offset of the name in the shstrtab
* section referenced by the ELF header. There is no
* value to elfdump in creating shstrtab, so we set
* sh_name to 0, knowing that elfdump doesn't look at it.
*/
/*
* Non-allocable section: Pass the addr (which is probably
* 0) and offset through without inspection.
*/
zero_bytes = 0;
/*
* Allocable section with a 0 vaddr. Figure out the
* real address by mapping the offset to it using the
* program headers.
*/
} else {
/*
* Allocable section with non-0 vaddr. Use the vaddr
* to derive the offset.
*/
}
return (0);
}
/*
* If the program header has its write flags set, then set
* the section write flag.
*/
/*
* Some sections define special meanings for sh_link and sh_info.
*/
case SHT_DYNAMIC:
break;
case SHT_DYNSYM:
break;
case SHT_SUNW_LDYNSYM:
/*
* ldynsym is all local symbols, so the index of the
* first global is equivalent to the number of symbols.
*/
break;
case SHT_HASH:
case SHT_SUNW_move:
case SHT_REL:
case SHT_RELA:
case SHT_SUNW_versym:
break;
case SHT_SUNW_verdef:
case SHT_SUNW_verneed:
break;
case SHT_SUNW_syminfo:
break;
case SHT_SUNW_symsort:
case SHT_SUNW_tlssort:
break;
}
/* Fill in fake Elf_Data descriptor */
return (1);
}
return (0);
}
if ((read_bytes > 0) &&
read_bytes, ELF_T_BYTE) == 0)) {
return (0);
}
if (zero_bytes > 0)
return (0);
}
return (1);
}
/*
* Generate a section header cache made up of information derived
* from the program headers.
*
* entry:
* file - Name of object
* fd - Open file handle for object
* elf - ELF descriptor
* ehdr - Elf header
* cache, shnum - Addresses of variables to receive resulting
* cache and number of sections.
*
* exit:
* On success, *cache and *shnum are set, and True (1) is returned.
* On failure, False (0) is returned.
*
* note:
* The cache returned by this routine must be freed using
* fake_shdr_cache_free(), and not by a direct call to free().
* Otherwise, memory will leak.
*/
int
{
/*
* The C language guarantees that a structure of homogeneous
* items will receive exactly the same layout in a structure
* as a plain array of the same type. Hence, this structure, which
* gives us by-name or by-index access to the various section
* info descriptors we maintain.
*
* We use this for sections where
* - Only one instance is allowed
* - We need to be able to access them easily by
* name (for instance, when mining the .dynamic
* section for information to build them up.
*
* NOTE: These fields must be in the same order as the
* SINFO_T_ type codes that correspond to them. Otherwise,
* they will end up in the wrong order in the cache array,
*/
struct {
/* Note: No entry is needed for SINFO_T_NULL */
} sec;
/*
* Doubly linked circular list, used to track sections
* where multiple sections of a given type can exist.
* seclist is the root of the list. Its sinfo field is not
* used --- it serves to anchor the root of the list, allowing
* rapid access to the first and last element in the list.
*/
return (0);
}
return (0);
}
/*
* Go through the program headers and look for information
* we can use to synthesize section headers. By far the most
* valuable thing is a dynamic section, the contents of
* which point at all sections used by ld.so.1.
*/
/*
* A program header with no file size does
* not have a backing section.
*/
continue;
default:
/* Header we can't use. Move on to next one */
continue;
case PT_DYNAMIC:
break;
case PT_INTERP:
break;
case PT_NOTE:
NULL)
continue;
break;
case PT_SUNW_UNWIND:
break;
case PT_SUNWCAP:
break;
}
/*
* the header in the SINFO struct set up by the
* switch statement above.
*/
}
/*
* If we found a dynamic section, look through it and
* gather information about the sections it references.
*/
case DT_HASH:
break;
case DT_STRTAB:
break;
case DT_SYMTAB:
break;
case DT_RELA:
break;
case DT_RELASZ:
break;
case DT_STRSZ:
break;
case DT_REL:
break;
case DT_RELSZ:
break;
case DT_INIT_ARRAY:
break;
case DT_INIT_ARRAYSZ:
break;
case DT_FINI_ARRAY:
break;
case DT_FINI_ARRAYSZ:
break;
case DT_PREINIT_ARRAY:
break;
case DT_PREINIT_ARRAYSZ:
break;
case DT_SUNW_SYMTAB:
break;
case DT_SUNW_SYMSZ:
break;
case DT_SUNW_SYMSORT:
break;
case DT_SUNW_SYMSORTSZ:
break;
case DT_SUNW_TLSSORT:
break;
case DT_SUNW_TLSSORTSZ:
break;
case DT_MOVETAB:
break;
case DT_MOVESZ:
break;
case DT_SYMINFO:
break;
case DT_SYMINSZ:
break;
case DT_VERSYM:
break;
case DT_VERDEF:
break;
case DT_VERDEFNUM:
break;
case DT_VERNEED:
break;
case DT_VERNEEDNUM:
break;
}
}
}
/*
* Different sections depend on each other, and are meaningless
* without them. For instance, even if a .dynsym exists,
* no use can be made of it without a dynstr. These relationships
* fan out: Disqualifying the .dynsym will disqualify the hash
* section, and so forth.
*
* Disqualify sections that don't have the necessary prerequisites.
*/
/* Things that need the dynamic string table */
}
/*
* The length of the hash section is encoded in its first two
* elements (nbucket, and nchain). The length of the dynsym,
* ldynsym, and versym are not given in the dynamic section,
* but are known to be the same as nchain.
*
* If we don't have a hash table, or cannot read nbuckets and
* nchain, we have to invalidate all of these.
*/
} else {
/* Use these counts to set sizes for related sections */
/*
* The ldynsym size received the DT_SUNW_SYMSZ
* value, which is the combined size of .dynsym
* and .ldynsym. Now that we have the dynsym size,
* use it to lower the ldynsym size to its real size.
*/
}
}
/*
* If the hash table is not present, or if the call to
* hash_size() failed, then discard the sections that
* need it to determine their length.
*/
}
/*
* The runtime linker does not receive size information for
* Verdef and Verneed sections. We have to read their data
* in pieces and calculate it.
*/
/* Discard any section with a zero length */
/* Things that need the dynamic symbol table */
}
/* Things that need the dynamic local symbol table */
}
/*
* Look through the results and fetch the data for any sections
* we have found. At the same time, count the number.
*/
num_sinfo = num_list_sinfo = 0;
num_sinfo++;
}
}
/*
* Allocate the cache array and fill it in. The cache array
* ends up taking all the dynamic memory we've allocated
* to build up sec and seclist, so on success, we have nothing
* left to clean up. If we can't allocate the cache array
* though, we have to free up everything else.
*/
return (0);
}
*_cache = cache_init;
_cache++;
ndx = 1;
_cache++;
num_sinfo--;
}
}
_cache++;
}
}
return (1);
}
/*
* Release all the memory referenced by a cache array allocated
* by fake_shdr_cache().
*/
void
{
}
}
}