gprof.c revision e0ddff35438f277370a2eae5c6718cd5ba0fe3ab
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sysexits.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include "gprof.h"
#include "profile.h"
char *whoami = "gprof";
/*
* things which get -E excluded by default.
*/
static char *defaultEs[] = {
"mcount",
"__mcleanup",
};
#ifdef DEBUG
static char *objname[] = {
"<invalid object>",
"PROF_BUFFER_T",
"PROF_CALLGRAPH_T",
"PROF_MODULES_T",
};
#define MAX_OBJTYPES 3
#endif /* DEBUG */
void
done(void)
{
}
static pctype
{
if (a > b)
return (a);
return (b);
}
static pctype
{
if (a < b)
return (a);
return (b);
}
/*
* calculate scaled entry point addresses (to save time in asgnsamples),
* and possibly push the scaled entry points over the entry mask,
* if it turns out that the entry point is in one bucket and the code
* for a routine is in the next bucket.
*
*/
static void
alignentries(void)
{
#ifdef DEBUG
#endif /* DEBUG */
/* for old-style gmon.out, nameslist is only in modules.nl */
#ifdef DEBUG
if (bucket_of_entry < bucket_of_code) {
if (debug & SAMPLEDEBUG) {
(void) printf(
"[alignentries] pushing svalue 0x%llx "
}
}
#endif /* DEBUG */
}
}
/*
* old-style gmon.out
* ------------------
*
* Assign samples to the procedures to which they belong.
*
* There are three cases as to where pcl and pch can be
* with respect to the routine entry addresses svalue0 and svalue1
* as shown in the following diagram. overlap computes the
* distance between the arrows, the fraction of the sample
* that is to be credited to the routine which starts at svalue0.
*
* svalue0 svalue1
* | |
* v v
*
* +-----------------------------------------------+
* | |
* | ->| |<- ->| |<- ->| |<- |
* | | | | | |
* +---------+ +---------+ +---------+
*
* ^ ^ ^ ^ ^ ^
* | | | | | |
* pcl pch pcl pch pcl pch
*
* For the vax we assert that samples will never fall in the first
* two bytes of any routine, since that is the entry mask,
* thus we give call alignentries() to adjust the entry points if
* the entry mask falls in one bucket but the code for the routine
* doesn't start until the next bucket. In conjunction with the
* alignment of routine addresses, this should allow us to have
* only one sample for every four bytes of text space and never
* have any overlap (the two end cases, above).
*/
static void
asgnsamples(void)
{
sztype i, j;
double time;
extern mod_info_t modules;
/* read samples and assign to namelist symbols */
alignentries();
for (i = 0, j = 1; i < nsamples; i++) {
if (ccnt == 0)
continue;
#ifdef DEBUG
if (debug & SAMPLEDEBUG) {
(void) printf(
"[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n",
}
#endif /* DEBUG */
for (j = (j ? j - 1 : 0); j < nname; j++) {
/*
* if high end of tick is below entry address,
* go for next tick.
*/
break;
/*
* if low end of tick into next routine,
* go for next routine.
*/
continue;
if (overlap != 0) {
#ifdef DEBUG
if (debug & SAMPLEDEBUG) {
(void) printf("[asgnsamples] "
"(0x%llx->0x%llx-0x%llx) %s gets "
"%f ticks %lld overlap\n",
}
#endif /* DEBUG */
}
}
}
#ifdef DEBUG
if (debug & SAMPLEDEBUG) {
}
#endif /* DEBUG */
}
static void
unsigned long ncallees)
{
mod_info_t *mi;
/*
* Write the callgraph header
*/
}
/* CONSTCOND */
if (CGRAPH_FILLER)
/* Current offset inside the callgraph object */
continue;
/* If this is the last callee, set next_to to 0 */
callee_id++;
else {
}
/*
* Dump this callee's raw arc information with all
* its callers
*/
caller_id = 1;
/*
* If no more callers for this callee, set
* next_from to 0
*/
else {
}
}
/* CONSTCOND */
if (FUNC_FILLER)
caller_id++;
}
} /* for nlp... */
} /* for mi... */
}
/*
* To save all pc-hits in all the gmon.out's is infeasible, as this
* may become quite huge even with a small number of files to sum.
* Instead, we'll dump *fictitious hits* to correct functions
* by scanning module namelists. Again, since this is summing
* pc-hits, we may have to dump the pcsamples out in chunks if the
* number of pc-hits is high.
*/
static void
{
}
/*
* Set up *fictitious* hits (to function entry) buffer
*/
for (i = 0; i < nelem; i++)
p[i] = hitpc;
}
}
if (ntowrite) {
}
}
free(p);
}
static void
unsigned long *ncallees)
{
mod_info_t *mi;
}
/* CONSTCOND */
if (BUF_FILLER)
*tarcs = 0;
*ncallees = 0;
}
(*ncallees)++;
}
}
}
}
static void
{
char *pbuf, *p;
mod_info_t *mi;
/* Allocate for path strings buffer */
}
/* Dump out PROF_MODULE_T info for all non-aout modules */
}
/* CONSTCOND */
if (MODLIST_FILLER)
/*
* Initialize offsets for ProfModule elements.
*/
else
}
/* CONSTCOND */
if (MOD_FILLER)
p += namelen + 1;
/* Note that offset to every path str need not be aligned */
off_nxt += PROFMOD_SZ;
}
/* Write out the module path strings */
if (pbuf_sz) {
}
}
}
/*
* If we have inactive modules, their current load addresses may overlap with
* active ones, and so we've to assign fictitious, non-overlapping addresses
* to all modules before we dump them.
*/
static void
{
unsigned int n_inactive = 0;
mod_info_t *mi;
/* Pick the lowest load address among modules */
*pathsz = 0;
n_inactive++;
/*
* Return total path size of non-aout modules only
*/
}
/*
* All module info is in fine shape already if there are no
* inactive modules
*/
if (n_inactive == 0)
return;
/*
* Assign fictitious load addresses to all (non-aout) modules so
* that sum info can be dumped out.
*/
/* just to give an appearance of reality */
} else {
/*
* can't use this lbase & lend pair, as it
* overlaps with aout's addresses
*/
}
}
}
static void
{
}
/* CONSTCOND */
if (HDR_FILLER)
}
static void
dumpsum_ostyle(char *sumfile)
{
}
/*
* dump the header; use the last header read in
*/
if (Bflag) {
}
} else {
}
}
/*
* dump the samples
*/
nsamples) {
}
/*
* dump the normalized raw arc information. For old-style dumping,
* the only namelist is in modules.nl
*/
if (Bflag) {
}
} else {
}
}
#ifdef DEBUG
if (debug & SAMPLEDEBUG) {
(void) printf(
"[dumpsum_ostyle] frompc 0x%llx selfpc "
}
#endif /* DEBUG */
}
}
}
/*
* dump out the gmon.sum file
*/
static void
{
unsigned long total_arcs; /* total number of arcs in all */
unsigned long ncallees; /* no. of callees with parents */
if (old_style) {
return;
}
}
/*
* Dump the new-style gprof header. Even if one of the original
* profiled-files was of a older version, the summed file is of
* current version only.
*/
/*
* Fix up load-maps and dump out modules info
*
* Fix up module load maps so inactive modules get *some* address
* (and btw, could you get the total size of non-aout module path
* strings please ?)
*/
/*
* Dump out the summ'd pcsamples
*
* For dumping call graph information later, we need certain
* statistics (like total arcs, number of callers for each node);
* collect these also while we are at it.
*/
/*
* Dump out the summ'd call graph information
*/
}
static void
{
/*
* if count == 0 this is a null arc and
* we don't need to tally it.
*/
return;
/*
* Lookup the caller and callee pcs in namelists of
* appropriate modules
*/
if (!Dflag)
else {
if (first_file)
else {
}
}
#ifdef DEBUG
if (debug & TALLYDEBUG) {
(void) printf("[tally] arc from %s to %s traversed "
}
#endif /* DEBUG */
}
}
/*
* Look up a module's base address in a sorted list of pc-hits. Unlike
* nllookup(), this deals with misses by mapping them to the next *higher*
* pc-hit. This is so that we get into the module's first pc-hit rightaway,
* even if the module's entry-point (load_base) itself is not a hit.
*/
static Address *
{
return (pclist);
return (NULL);
else
}
/* must never reach here! */
return (NULL);
}
static void
{
#ifdef DEBUG
size_t n_hits_in_module = 0;
#endif /* DEBUG */
/* Locate the first pc-hit for this module */
#ifdef DEBUG
if (debug & PCSMPLDEBUG) {
(void) printf("[assign_pcsamples] no pc-hits in\n");
(void) printf(
}
#endif /* DEBUG */
return; /* no pc-hits in this module */
}
/* Assign all pc-hits in this module to appropriate functions */
/* Update the corresponding function's time */
/*
* Collect all pc-hits in this function. Each
* pc-hit counts as 1 tick.
*/
func_nticks = 0;
func_nticks++;
pcptr++;
}
if (func_nticks == 0)
pcptr++;
else {
totime += func_nticks;
}
#ifdef DEBUG
#endif /* DEBUG */
} else {
/*
* pc sample could not be assigned to function;
* probably in a PLT
*/
pcptr++;
}
}
#ifdef DEBUG
if (debug & PCSMPLDEBUG) {
(void) printf(
"[assign_pcsamples] %ld hits in\n", n_hits_in_module);
}
#endif /* DEBUG */
}
int
{
return (1);
return (-1);
return (0);
}
static void
{
mod_info_t *mi;
caddr_t p;
#ifdef DEBUG
if (debug & PCSMPLDEBUG) {
(void) printf(
"[process_pcsamples] number of pcsamples = %lld\n",
}
#endif /* DEBUG */
/* buffer with no pc samples ? */
return;
/*
* If we're processing pcsamples of a profile sum, we could have
* more than PROF_BUFFER_SIZE number of samples. In such a case,
* we must read the pcsamples in chunks.
*/
/* Allocate for the pcsample chunk */
if (pc_samples == NULL) {
whoami, chunk_size);
}
/* Copy the current set of pcsamples */
nelem_read = 0;
while (nelem_read < nelem_to_read) {
(void) memcpy((void *) pc_samples, p,
chunk_size * sizeof (Address));
/* Sort the pc samples */
/*
* Assign pcsamples to functions in the currently active
* module list
*/
continue;
}
p += (chunk_size * sizeof (Address));
nelem_read += chunk_size;
}
/* Update total number of pcsamples read so far */
}
static mod_info_t *
{
mod_info_t *mi;
continue;
return (mi);
}
return (NULL);
}
static void
{
/*
* Note that *callee_off* increment in the for loop below
* uses *calleep* and *calleep* doesn't get set until the for loop
* is entered. We don't expect the increment to be executed before
* the loop body is executed atleast once, so this should be ok.
*/
/* LINTED: pointer cast */
/*
* We could choose either to sort the {caller, callee}
* the modules list is usually very small, we'l choose the
* latter.
*/
/*
* If we cannot identify a callee with a module, there's
* no use worrying about who called it.
*/
#ifdef DEBUG
if (debug & CGRAPHDEBUG) {
(void) printf(
"[process_cgraph] callee %#llx missed\n",
}
#endif /* DEBUG */
continue;
} else
/* LINTED: pointer cast */
NULL) {
#ifdef DEBUG
if (debug & CGRAPHDEBUG) {
(void) printf(
"[process_cgraph] caller %#llx "
}
#endif /* DEBUG */
continue;
}
#ifdef DEBUG
if (debug & CGRAPHDEBUG) {
(void) printf(
"[process_cgraph] arc <%#llx, %#llx, "
}
#endif /* DEBUG */
}
}
#ifdef DEBUG
puts("\n");
#endif /* DEBUG */
}
/*
* Two modules overlap each other if they don't lie completely *outside*
* each other.
*/
static bool
{
/* case 1: new module lies completely *before* the old one */
return (FALSE);
/* case 2: new module lies completely *after* the old one */
return (FALSE);
/* probably a dlopen: the modules overlap each other */
return (TRUE);
}
static bool
{
}
return (TRUE);
else
return (FALSE);
}
static void
{
char *so_path;
bool more_modules = TRUE;
struct stat so_statbuf;
#ifdef DEBUG
if (debug & MODULEDEBUG) {
(void) printf("[process_modules] module obj version %u\n",
}
#endif /* DEBUG */
/* Check version of module type object */
}
/*
* Scan the PROF_MODULES_T list and add modules to current list
* of modules, if they're not present already
*/
/* LINTED: pointer cast */
do {
/*
* Since the prog could've been renamed after its run, we
* should see if this overlaps a.out. If it does, it is
* probably the renamed aout. We should also skip any other
* non-sharedobj's that we see (or should we report an error ?)
*/
(!is_shared_obj(so_path))) {
/* LINTED: pointer cast */
newmodp = (ProfModule *)
#ifdef DEBUG
if (debug & MODULEDEBUG) {
(void) printf(
"[process_modules] `%s'\n", so_path);
(void) printf(" skipped\n");
}
#endif /* DEBUG */
continue;
}
#ifdef DEBUG
if (debug & MODULEDEBUG)
#endif /* DEBUG */
/*
* Check all modules (leave the first one, 'cos that
* is the program executable info). If this module is already
* there in the list, update the load addresses and proceed.
*/
/*
* We expect the full pathname for all shared objects
* needed by the program executable. In this case, we
* simply need to compare the paths to see if they are
* the same file.
*/
break;
/*
* Check if this new shared object will overlap
* any existing module. If yes, remove the old one
* from the linked list (but don't free it, 'cos
* there may be symbols referring to this module
* still)
*/
#ifdef DEBUG
if (debug & MODULEDEBUG) {
(void) printf(
"[process_modules] `%s'\n",
so_path);
(void) printf(
" overlaps\n");
(void) printf(
" `%s'\n",
}
#endif /* DEBUG */
}
}
/* Module already there, skip it */
/* LINTED: pointer cast */
newmodp = (ProfModule *)
#ifdef DEBUG
if (debug & MODULEDEBUG) {
(void) printf("[process_modules] base=%#llx, "
}
#endif /* DEBUG */
continue;
}
/*
* Check if gmon.out is outdated with respect to the new
* module we want to add
*/
"%s: shared obj outdates prof info\n", whoami);
}
/* Create a new module element */
if (new_module == NULL) {
whoami, sizeof (mod_info_t));
}
/* and fill in info... */
}
#ifdef DEBUG
if (debug & MODULEDEBUG) {
(void) printf(
"[process_modules] base=%#llx, end=%#llx\n",
}
#endif /* DEBUG */
/* Create this module's nameslist */
/* Add it to the tail of active module list */
n_modules++;
#ifdef DEBUG
if (debug & MODULEDEBUG) {
(void) printf(
"[process_modules] total shared objects = %ld\n",
n_modules - 1);
}
#endif /* DEBUG */
/*
* Move to the next module in the PROF_MODULES_T list
* (if present)
*/
/* LINTED: pointer cast */
} while (more_modules);
}
static void
reset_active_modules(void)
{
mod_info_t *mi;
/* Except the executable, no other module should remain active */
}
static void
{
/*
* Before processing a new gmon.out, all modules except the
* program executable must be made inactive, so that symbols
* are searched only in the program executable, if we don't
* find a MODULES_T object. Don't do it *after* we read a gmon.out,
* because we need the active module data after we're done with
* the last gmon.out, if we're doing summing.
*/
/* LINTED: pointer cast */
#ifdef DEBUG
{
unsigned int type = 0;
if (debug & MONOUTDEBUG) {
(void) printf(
"\n[getpfiledata] object %s [%#lx]\n",
}
}
#endif /* DEBUG */
case PROF_MODULES_T :
break;
case PROF_CALLGRAPH_T :
found_cgraph = TRUE;
break;
case PROF_BUFFER_T :
break;
default :
"%s: unknown prof object type=%d\n",
}
/* LINTED: pointer cast */
}
if (!found_cgraph || !found_pcsamples) {
}
}
if (first_file)
first_file = FALSE;
}
static void
{
/*
* the rest of the file consists of
* a bunch of <from,self,count> tuples.
*/
/* CONSTCOND */
while (1) {
if (rflag) {
if (Bflag) {
/*
* If rflag is set then this is an profiled
* image generated by rtld. It needs to be
* 'converted' to the standard data format.
*/
if (fread(&rtld_arc64,
break;
else
} else {
/*
* If rflag is set then this is an profiled
* image generated by rtld. It needs to be
* 'converted' to the standard data format.
*/
break;
else
}
} else {
if (Bflag) {
pfile) != 1) {
break;
}
} else {
/*
* If these aren't big %pc's, we need to read
* into the 32-bit raw arc structure, and
* assign the members into the actual arc.
*/
break;
}
}
#ifdef DEBUG
if (debug & SAMPLEDEBUG) {
(void) printf("[getpfile] frompc 0x%llx selfpc "
}
#endif /* DEBUG */
/*
* add this arc
*/
}
if (first_file)
first_file = FALSE;
}
static void
{
sztype i;
if (samples == 0) {
sizeof (unsigned_UNIT));
if (samples == 0) {
"%s: No room for %d sample pc's\n",
}
}
for (i = 0; i < nsamples; i++) {
break;
}
if (i != nsamples) {
"%s: unexpected EOF after reading %d/%d samples\n",
}
}
static void *
{
int fd;
bool invalid_version;
/*
* Check versioning info. For now, let's say we provide
* backward compatibility, so we accept all older versions.
*/
perror("fread()");
}
}
if (invalid_version) {
}
/*
* Map gmon.out onto memory.
*/
}
}
if (fmem == MAP_FAILED) {
}
/*
* Before we close this fd, save this gmon.out's info to later verify
* if the shared objects it references have changed since the time
* they were used to generate this gmon.out
*/
}
return ((void *) fmem);
}
static void *
{
unsigned long magic_num;
static bool first_time = TRUE;
extern bool old_style;
}
/*
* Read in the magic. Note that we changed the cast "unsigned long"
* to "unsigned int" because that's how h_magic is defined in the
* new format ProfHeader.
*/
perror("fread()");
}
/*
* First check if this is versioned or *old-style* gmon.out
*/
if (magic_num == (unsigned int)PROF_MAGIC) {
"profiled files\n", whoami);
}
first_time = FALSE;
}
"profiled files\n", whoami);
}
first_time = FALSE;
fsz = 0;
/*
* Now, we need to determine if this is a run-time linker
* profiled file or if it is a standard gmon.out.
*
* We do this by checking if magic matches PRF_MAGIC. If it
* does, then this is a run-time linker profiled file, if it
* doesn't, it must be a gmon.out file.
*/
else
if (rflag) {
if (Bflag) {
/*
* If the rflag is set then the input file is
* rtld profiled data, we'll read it in and convert
* it to the standard format (ie: make it look like
* a gmon.out file).
*/
perror("fread()");
}
"%s: expected version %d, "
"got version %d when processing 64-bit "
"run-time linker profiled file.\n",
}
} else {
/*
* If the rflag is set then the input file is
* rtld profiled data, we'll read it in and convert
* it to the standard format (ie: make it look like
* a gmon.out file).
*/
perror("fread()");
}
"%s: expected version %d, "
"got version %d when processing "
"run-time linker profiled file.\n",
}
}
} else {
if (Bflag) {
perror("fread()");
}
} else {
/*
* If we're not reading big %pc's, we need to read
* the 32-bit header, and assign the members to
* the actual header.
*/
perror("fread()");
}
}
}
/*
* perform sanity check on profiled file we've opened.
*/
if (rflag)
"%s: badly formed profiled data.\n",
filename);
else
"%s: badly formed gmon.out file.\n",
filename);
}
"%s: incompatible with first gmon file\n",
filename);
}
h = tmp;
#ifdef DEBUG
if (debug & SAMPLEDEBUG) {
"0x%llx hdr.ncnt %lld\n",
(void) printf(
"[openpfile] s_lowpc 0x%llx s_highpc 0x%llx\n",
(void) printf(
"[openpfile] lowpc 0x%llx highpc 0x%llx\n",
(void) printf("[openpfile] sampbytes %d nsamples %d\n",
}
#endif /* DEBUG */
return ((void *) pfile);
}
/*
* Information from a gmon.out file depends on whether it's versioned
* or non-versioned, *old style* gmon.out. If old-style, it is in two
* parts : an array of sampling hits within pc ranges, and the arcs. If
* versioned, it contains a header, followed by any number of
* modules/callgraph/pcsample_buffer objects.
*/
static void
{
void *handle;
if (old_style) {
return;
}
}
int
{
char **sp;
int c;
int errflg;
debug = 0;
first_file = TRUE;
switch (c) {
case 'a':
break;
case 'b':
break;
case 'c':
break;
case 'C':
break;
case 'd':
break;
case 'D':
break;
case 'E':
break;
case 'e':
break;
case 'F':
break;
case 'f':
break;
case 'l':
break;
case 'n':
break;
case 's':
break;
case 'z':
break;
case '?':
errflg++;
}
if (errflg) {
"usage: gprof [ -abcCDlsz ] [ -e function-name ] "
"[ -E function-name ]\n\t[ -f function-name ] "
"[ -F function-name ]\n\t[ image-file "
"[ profile-file ... ] ]\n");
}
} else {
}
} else {
}
/*
* turn off default functions
*/
}
/*
* how many ticks per second?
* if we can't tell, report time in ticks.
*/
if (hz == -1) {
hz = 1;
}
/*
* get information about mon.out file(s).
*/
do {
else
optind++;
/*
* dump out a gmon.sum file if requested
*/
if (old_style) {
/*
* assign samples to procedures
*/
asgnsamples();
}
/*
* assemble the dynamic profile
*/
timesortnlp = doarcs();
/*
* print the dynamic profile
*/
#ifdef DEBUG
/* raw output of all symbols in all their glory */
int i;
(void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, "
"#calls, selfcalls, index \n");
if (timesortnlp[i]->name)
else
(void) printf(" <cycle> ");
(void) printf(" \n");
}
}
#endif /* DEBUG */
/*
* print the flat profile
*/
printprof();
/*
* print the index
*/
printindex();
/*
* print the modules
*/
printmodules();
done();
/* NOTREACHED */
return (0);
}