/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
#include <string.h>
#include <signal.h>
#include <errno.h>
#include <dirent.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <libproc.h>
#include <sys/sysmacros.h>
#include <libgen.h>
#include <thread.h>
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
static struct ps_prochandle *Pr;
static char *command;
static volatile int interrupt;
static int Fflag;
static int cflag = 1;
static void intr(int);
static int setpgsz(struct ps_prochandle *, int, size_t *);
static int setpgsz_anon(struct ps_prochandle *, size_t, int);
static caddr_t setup_mha(uint_t, size_t, int);
static size_t discover_optimal_pagesize(struct ps_prochandle *,
uint_t, pid_t);
static void usage();
#define INVPGSZ 3
/* subopt */
static char *suboptstr[] = {
"heap",
"stack",
"anon",
NULL
};
enum suboptenum {
E_HEAP,
E_STACK,
E_ANON
};
static size_t
atosz(char *optarg)
{
size_t sz = 0;
char *endptr;
if (optarg == NULL || optarg[0] == '\0')
return (INVPGSZ);
sz = strtoll(optarg, &endptr, 0);
switch (*endptr) {
case 'T':
case 't':
sz *= 1024;
/*FALLTHRU*/
case 'G':
case 'g':
sz *= 1024;
/*FALLTHRU*/
case 'M':
case 'm':
sz *= 1024;
/*FALLTHRU*/
case 'K':
case 'k':
sz *= 1024;
/*FALLTHRU*/
case 'B':
case 'b':
default:
break;
}
return (sz);
}
/* pgsz array sufficient for max page sizes */
static size_t pgsza[8 * sizeof (void *)];
static int nelem;
static void
getpgsz()
{
if ((nelem = getpagesizes(NULL, 0)) == 0) {
(void) fprintf(stderr, "%s: cannot determine system page"
" sizes\n", command);
exit(125);
}
(void) getpagesizes(pgsza, nelem);
}
static size_t
cnvpgsz(char *optarg)
{
size_t pgsz = atosz(optarg);
int i;
if (!ISP2(pgsz) || ((pgsz < pgsza[0]) && pgsz != 0)) {
pgsz = INVPGSZ;
} else {
for (i = nelem - 1; i >= 0; i--) {
if (pgsz == pgsza[i])
break;
if (pgsz > pgsza[i]) {
pgsz = INVPGSZ;
break;
}
}
}
if (pgsz == INVPGSZ) {
if (optarg != NULL) {
(void) fprintf(stderr,
"%s: invalid page size specified (%s)\n",
command, optarg);
} else {
usage();
}
exit(125);
}
return (pgsz);
}
static void
usage()
{
(void) fprintf(stderr,
"usage:\t%s -o option[,option] [-F] cmd | -p pid ...\n"
" (set preferred page size of cmd or each process)\n"
" -o option[,option]: options are\n"
" stack=sz\n"
" heap=sz\n"
" anon=sz (sz: valid page size or 0 (zero))\n"
" -F: force grabbing of the target process(es)\n"
" cmd: launch command\n"
" -p pid ...: process id list\n",
command);
exit(125);
}
int
main(int argc, char *argv[])
{
int rc, err = 0;
int opt, subopt;
int errflg = 0;
char *options, *value;
size_t pgsz[] = {INVPGSZ, INVPGSZ, INVPGSZ};
pid_t pid;
int status;
if ((command = strrchr(argv[0], '/')) != NULL)
command++;
else
command = argv[0];
getpgsz();
/* options */
while ((opt = getopt(argc, argv, "o:Fp")) != EOF) {
switch (opt) {
case 'o': /* options */
options = optarg;
while (*options != '\0') {
subopt = getsubopt(&options, suboptstr, &value);
switch (subopt) {
case E_HEAP:
case E_STACK:
case E_ANON:
pgsz[subopt] = cnvpgsz(value);
break;
default:
errflg = 1;
break;
}
}
break;
case 'F': /* force grabbing (no O_EXCL) */
Fflag = PGRAB_FORCE;
break;
case 'p':
cflag = 0;
break;
default:
errflg = 1;
break;
}
}
argc -= optind;
argv += optind;
if ((pgsz[E_HEAP] == INVPGSZ && pgsz[E_STACK] == INVPGSZ &&
pgsz[E_ANON] == INVPGSZ) || errflg || argc <= 0) {
usage();
}
/* catch signals from terminal */
if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
(void) sigset(SIGHUP, intr);
if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
(void) sigset(SIGINT, intr);
if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
(void) sigset(SIGQUIT, intr);
(void) sigset(SIGTERM, intr);
if (cflag && !interrupt) { /* command */
int err;
char path[PATH_MAX];
Pr = Pcreate(argv[0], &argv[0], &err, path, sizeof (path));
if (Pr == NULL) {
switch (err) {
case C_PERM:
(void) fprintf(stderr,
"%s: cannot control set-id or "
"unreadable object file: %s\n",
command, path);
break;
case C_LP64:
(void) fprintf(stderr,
"%s: cannot control _LP64 "
"program: %s\n", command, path);
break;
case C_NOEXEC:
(void) fprintf(stderr, "%s: cannot execute "
"program: %s\n", command, argv[0]);
exit(126);
break;
case C_NOENT:
(void) fprintf(stderr, "%s: cannot find "
"program: %s\n", command, argv[0]);
exit(127);
break;
case C_STRANGE:
break;
default:
(void) fprintf(stderr,
"%s: %s\n", command, Pcreate_error(err));
break;
}
exit(125);
}
if ((rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz)) != 0) {
(void) fprintf(stderr, "%s: set page size "
"failed for program: %s\n", command, argv[0]);
(void) pr_exit(Pr, 1);
exit(125);
}
/*
* release the command to run, wait for it and
* return it's exit status if we can.
*/
Prelease(Pr, 0);
do {
pid = wait(&status);
} while (pid == -1 && errno == EINTR);
if (pid == -1) {
(void) fprintf(stderr, "%s: wait() error: %s\n",
command, strerror(errno));
exit(125);
}
/*
* Pass thru the child's exit value.
*/
if (WIFEXITED(status))
exit(WEXITSTATUS(status));
exit(status | WCOREFLG);
}
/* process pids */
while (--argc >= 0 && !interrupt) {
char *arg;
psinfo_t psinfo;
int gret;
(void) fflush(stdout); /* line-at-a-time */
/* get the specified pid and the psinfo struct */
arg = *argv++;
pid = proc_arg_psinfo(arg, PR_ARG_PIDS, &psinfo, &gret);
if (pid == -1) {
(void) fprintf(stderr, "%s: cannot examine pid %s:"
" %s\n", command, arg, Pgrab_error(gret));
if (!isdigit(arg[0]) && strncmp(arg, "/proc/", 6)) {
(void) fprintf(stderr,
"\tdo not use -p option"
" to launch a command\n");
}
err++;
} else if ((Pr = Pgrab(pid, Fflag, &gret)) != NULL) {
rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz);
if (rc != 0) {
(void) fprintf(stderr, "%s: set page size "
"failed for pid: %d\n", command, (int)pid);
err++;
}
Prelease(Pr, 0);
Pr = NULL;
} else {
switch (gret) {
case G_SYS:
proc_unctrl_psinfo(&psinfo);
(void) fprintf(stderr, "%s: cannot set page "
"size for system process: %d [ %s ]\n",
command, (int)pid, psinfo.pr_psargs);
err++;
break;
case G_SELF:
/* do it to own self */
rc = setpgsz(NULL, psinfo.pr_dmodel, pgsz);
if (rc != 0) {
(void) fprintf(stderr, "%s: set page"
"size failed for self: %d\n",
command, (int)pid);
err++;
}
break;
default:
(void) fprintf(stderr, "%s: %s: %d\n",
command, Pgrab_error(gret), (int)pid);
err++;
break;
}
}
}
if (interrupt || err)
exit(125);
return (0);
}
/* ARGSUSED */
static void
intr(int sig)
{
interrupt = 1;
}
/* ------ begin specific code ------ */
/* set process page size */
/*ARGSUSED*/
static int
setpgsz(struct ps_prochandle *Pr, int dmodel, size_t pgsz[])
{
int rc;
int err = 0;
caddr_t mpss;
int i;
static uint_t pgszcmd[] =
{MHA_MAPSIZE_BSSBRK, MHA_MAPSIZE_STACK, MHA_MAPSIZE_VA};
for (i = E_HEAP; i <= E_ANON; i++) {
if (pgsz[i] == INVPGSZ)
continue;
if (i == E_ANON)
rc = setpgsz_anon(Pr, pgsz[i], dmodel);
else {
mpss = setup_mha(pgszcmd[i], pgsz[i], dmodel);
rc = pr_memcntl(Pr, NULL, 0, MC_HAT_ADVISE, mpss, 0, 0);
}
if (rc < 0) {
(void) fprintf(stderr, "%s: warning: set %s page size "
"failed (%s) for pid %d\n", command, suboptstr[i],
strerror(errno), (int)Pstatus(Pr)->pr_pid);
err++;
}
}
return (err);
}
/*
* Walk through the process' address space segments. Set all anonymous
* segments to the new page size.
*/
static int
setpgsz_anon(struct ps_prochandle *Pr, size_t pgsz, int dmodel)
{
caddr_t mpss;
prmap_t map;
uintptr_t addr;
size_t size;
const psinfo_t *psinfo;
const pstatus_t *pstatus;
int fd;
int rc;
char path[PATH_MAX];
/*
* Setting the page size for anonymous segments on a process before it
* has run will have no effect, since it has not configured anonymous
* memory and the page size setting is not "sticky" inside the kernel.
* Any anonymous memory subsequently mapped will have the default page
* size.
*/
if (cflag)
return (0);
if ((psinfo = Ppsinfo(Pr)) == NULL)
return (-1);
if ((pstatus = Pstatus(Pr)) == NULL)
return (-1);
if (pgsz == 0)
pgsz = discover_optimal_pagesize(Pr, dmodel, psinfo->pr_pid);
mpss = setup_mha(MHA_MAPSIZE_VA, pgsz, dmodel);
(void) snprintf(path, PATH_MAX, "/proc/%d/map", (int)psinfo->pr_pid);
if ((fd = open(path, O_RDONLY)) < 0)
return (-1);
while (read(fd, &map, sizeof (map)) == sizeof (map)) {
if ((map.pr_mflags & MA_ANON) == 0) {
/* Not anon. */
continue;
} else if (map.pr_mflags & MA_SHARED) {
/* Can't change pagesize for shared mappings. */
continue;
} else if (map.pr_vaddr + map.pr_size >
pstatus->pr_brkbase &&
map.pr_vaddr <
pstatus->pr_brkbase + pstatus->pr_brksize) {
/* Heap. */
continue;
} else if (map.pr_vaddr >= pstatus->pr_stkbase &&
map.pr_vaddr + map.pr_size <=
pstatus->pr_stkbase + pstatus->pr_stksize) {
/* Stack. */
continue;
} else if (map.pr_size < pgsz) {
/* Too small. */
continue;
}
/*
* Find the first address in the segment that is page-aligned.
*/
if (pgsz == 0 || ((map.pr_vaddr % pgsz) == 0))
addr = map.pr_vaddr;
else
addr = map.pr_vaddr + (pgsz - (map.pr_vaddr % pgsz));
/*
* Calculate how many pages will fit in the segment.
*/
if (pgsz == 0)
size = map.pr_size;
else
size = map.pr_size - (addr % map.pr_vaddr) -
((map.pr_vaddr + map.pr_size) % pgsz);
/*
* If no aligned pages fit in the segment, ignore it.
*/
if (size < pgsz) {
continue;
}
rc = pr_memcntl(Pr, (caddr_t)addr, size,
MC_HAT_ADVISE, mpss, 0, 0);
/*
* If an error occurs on any segment, report the error here and
* then go on to try setting the page size for the remaining
* segments.
*/
if (rc < 0) {
(void) fprintf(stderr, "%s: warning: set page size "
"failed (%s) for pid %d for anon segment at "
"address: %p\n", command, strerror(errno),
(int)psinfo->pr_pid, (void *)map.pr_vaddr);
}
}
(void) close(fd);
return (0);
}
/*
* Discover the optimal page size for the process.
* Do this by creating a 4M segment in the target process, set its pagesize
* to 0, and read the map file to discover the page size selected by the system.
*/
static size_t
discover_optimal_pagesize(struct ps_prochandle *Pr, uint_t dmodel, pid_t pid)
{
size_t size = 0;
size_t len = pgsza[nelem - 1];
prxmap_t xmap;
caddr_t mha;
void *addr;
int fd = -1;
char path[PATH_MAX];
(void) snprintf(path, PATH_MAX, "/proc/%d/xmap", (int)pid);
if ((fd = open(path, O_RDONLY)) < 0)
return (size);
if ((addr = pr_mmap(Pr, (void *)len, len, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON | MAP_ALIGN, -1, 0)) == MAP_FAILED) {
goto err;
}
mha = setup_mha(MHA_MAPSIZE_VA, 0, dmodel);
if (pr_memcntl(Pr, addr, len, MC_HAT_ADVISE, mha, 0, 0) < 0) {
goto err;
}
/*
* Touch a page in the segment so the hat mapping gets created.
*/
(void) Pwrite(Pr, &len, sizeof (len), (uintptr_t)addr);
/*
* Read through the address map looking for our segment.
*/
while (read(fd, &xmap, sizeof (xmap)) == sizeof (xmap)) {
if (xmap.pr_vaddr == (uintptr_t)addr)
break;
}
if (xmap.pr_vaddr != (uintptr_t)addr)
goto err;
size = xmap.pr_hatpagesize;
err:
if (addr != MAP_FAILED) {
if (pr_munmap(Pr, addr, len) == -1) {
(void) fprintf(stderr,
"%s: couldn't delete segment at %p\n",
command, addr);
}
}
if (fd != -1)
(void) close(fd);
return (size);
}
static struct memcntl_mha gmha;
#ifdef _LP64
static struct memcntl_mha32 gmha32;
#endif
static caddr_t
/* ARGSUSED */
setup_mha(uint_t command, size_t pagesize, int dmodel)
{
#ifdef _LP64
if (dmodel == PR_MODEL_ILP32) {
gmha32.mha_cmd = command;
gmha32.mha_flags = 0;
gmha32.mha_pagesize = pagesize;
return ((caddr_t)&gmha32);
}
#endif
gmha.mha_cmd = command;
gmha.mha_flags = 0;
gmha.mha_pagesize = pagesize;
return ((caddr_t)&gmha);
}