madv.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <sys/shm.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/auxv.h>
#include <stdarg.h>
#include <syslog.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <procfs.h>
#include <dlfcn.h>
#include <assert.h>
#include <libintl.h>
#include <locale.h>
extern int gmatch(const char *s, const char *p);
#pragma init(__madvmain)
static FILE *errfp = NULL;
static const char *madvident = "madv.so.1";
static int pagesize;
static int advice_all = -1;
static int advice_heap = -1;
static int advice_shm = -1;
static int advice_ism = -1;
static int advice_dism = -1;
static int advice_map = -1;
static int advice_mapshared = -1;
static int advice_mapprivate = -1;
static int advice_mapanon = -1;
/* environment variables */
#define ENV_MADV "MADV"
#define ENV_MADVCFGFILE "MADVCFGFILE"
#define ENV_MADVERRFILE "MADVERRFILE"
/* config file */
#define DEF_MADVCFGFILE "/etc/madv.conf"
#define MAXLINELEN MAXPATHLEN + 64
#define CFGDELIMITER ':'
#define ARGDELIMITER ' '
/*
* avoid malloc which causes certain applications to crash
*/
static char lbuf[MAXLINELEN];
static char pbuf[MAXPATHLEN];
#ifdef MADVDEBUG
#define ENV_MADVDEBUG "MADVDEBUG"
#define MADVPRINT(x, y) if (madvdebug & x) (void) fprintf y;
static int madvdebug = 0;
#else
#define MADVPRINT(x, y)
#endif
/*
* advice options
*/
static char *legal_optstr[] = {
"madv",
"heap",
"shm",
"ism",
"dism",
"map",
"mapshared",
"mapprivate",
"mapanon",
NULL
};
enum optenum {
OPT_MADV,
OPT_HEAP,
OPT_SHM,
OPT_ISM,
OPT_DISM,
OPT_MAP,
OPT_MAPSHARED,
OPT_MAPPRIVATE,
OPT_MAPANON
};
/*
* Advice values
* These need to correspond to the order of the MADV_ flags in mman.h
* since the position infers the value for the flag.
*/
static char *legal_madvice[] = {
"normal",
"random",
"sequential",
"willneed_NOT_SUPPORTED!",
"dontneed_NOT_SUPPORTED!",
"free_NOT_SUPPORTED!",
"access_default",
"access_lwp",
"access_many",
NULL
};
#if !defined(TEXT_DOMAIN)
#define TEXT_DOMAIN "SYS_TEST"
#endif
/*PRINTFLIKE2*/
static void
madverr(FILE *fp, char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
if (fp)
(void) vfprintf(fp, fmt, ap);
else
vsyslog(LOG_ERR, fmt, ap);
va_end(ap);
}
/*
* Return the pointer to the fully-resolved path name of the process's
* executable file obtained from the AT_SUN_EXECNAME aux vector entry.
*/
static const char *
mygetexecname(void)
{
const char *execname = NULL;
static auxv_t auxb;
/*
* The first time through, read the initial aux vector that was
* passed to the process at exec(2). Only do this once.
*/
int fd = open("/proc/self/auxv", O_RDONLY);
if (fd >= 0) {
while (read(fd, &auxb, sizeof (auxv_t)) == sizeof (auxv_t)) {
if (auxb.a_type == AT_SUN_EXECNAME) {
execname = auxb.a_un.a_ptr;
break;
}
}
(void) close(fd);
}
return (execname);
}
/*
* Return the process's current brk base and size.
*/
static int
mygetbrk(uintptr_t *base, size_t *size)
{
int fd;
pstatus_t ps;
int rc;
fd = open("/proc/self/status", O_RDONLY);
if (fd >= 0) {
if (read(fd, &ps, sizeof (ps)) == sizeof (ps)) {
*base = ps.pr_brkbase;
*size = ps.pr_brksize;
rc = 0;
} else {
rc = errno;
}
(void) close(fd);
} else {
rc = errno;
}
return (rc);
}
/*
* Check if exec name matches cfgname found in madv cfg file.
*/
static int
fnmatch(const char *execname, char *cfgname, char *cwd)
{
const char *ename;
int rc;
/* cfgname should not have a '/' unless it begins with one */
if (cfgname[0] == '/') {
/*
* if execname does not begin with a '/', prepend the
* current directory.
*/
if (execname[0] != '/') {
ename = (const char *)strcat(cwd, execname);
} else
ename = execname;
} else { /* simple cfg name */
if (ename = strrchr(execname, '/'))
/* execname is a path name - get the base name */
ename++;
else
ename = execname;
}
rc = gmatch(ename, cfgname);
MADVPRINT(2, (stderr, "gmatch: %s %s %s %d\n",
cfgname, ename, execname, rc));
return (rc);
}
/*
* Check if string matches any of exec arguments.
*/
static int
argmatch(char *str)
{
int fd;
psinfo_t pi;
int rc = 0;
int arg;
char **argv;
fd = open("/proc/self/psinfo", O_RDONLY);
if (fd >= 0) {
if (read(fd, &pi, sizeof (pi)) == sizeof (pi)) {
argv = (char **)pi.pr_argv;
argv++;
MADVPRINT(2, (stderr, "argmatch: %s ", str));
for (arg = 1; arg < pi.pr_argc; arg++, argv++) {
if (rc = gmatch(*argv, str)) {
MADVPRINT(2, (stderr, "%s ", *argv));
break;
}
}
MADVPRINT(2, (stderr, "%d\n", rc));
} else {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: /proc/self/psinfo read failed [%s]\n"),
madvident, strerror(errno));
}
(void) close(fd);
} else {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: /proc/self/psinfo open failed [%s]\n"),
madvident, strerror(errno));
}
return (rc);
}
static int
empty(char *str)
{
char c;
while ((c = *str) == '\n' || c == ' ' || c == '\t')
str++;
return (*str == '\0');
}
static int
strtoadv(char *advstr)
{
char *dummy, *locstr = advstr;
return (getsubopt(&locstr, legal_madvice, &dummy));
}
static void
advice_opts(char *optstr, const char *execname, char *cfgfile, int lineno)
{
char *value;
int opt;
int advice = 0;
while (*optstr != '\0') {
opt = getsubopt(&optstr, legal_optstr, &value);
if (opt < 0) {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: invalid advice option (%s)"
" for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile, lineno);
break;
} else if (!value) {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: option missing advice"
" for %s - cfgfile: %s, line: %d\n"),
madvident, execname, cfgfile, lineno);
break;
}
advice = strtoadv(value);
if (advice < 0) {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: invalid advice specified (%s)"
" for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile, lineno);
break;
}
switch (opt) {
case OPT_MADV:
advice_all = advice;
break;
case OPT_HEAP:
if (advice_heap < 0) {
advice_heap = advice;
} else {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: duplicate advice specified "
"(%s) for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile,
lineno);
}
break;
case OPT_SHM:
if (advice_shm < 0) {
advice_shm = advice;
} else {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: duplicate advice specified "
"(%s) for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile,
lineno);
}
break;
case OPT_ISM:
if (advice_ism < 0) {
advice_ism = advice;
} else {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: duplicate advice specified "
"(%s) for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile,
lineno);
}
break;
case OPT_DISM:
if (advice_dism < 0) {
advice_dism = advice;
} else {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: duplicate advice specified "
"(%s) for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile,
lineno);
}
break;
case OPT_MAP:
if (advice_map < 0) {
advice_map = advice;
} else {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: duplicate advice specified "
"(%s) for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile,
lineno);
}
break;
case OPT_MAPSHARED:
if (advice_mapshared < 0) {
advice_mapshared = advice;
} else {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: duplicate advice specified "
"(%s) for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile,
lineno);
}
break;
case OPT_MAPPRIVATE:
if (advice_mapprivate < 0) {
advice_mapprivate = advice;
} else {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: duplicate advice specified "
"(%s) for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile,
lineno);
}
break;
case OPT_MAPANON:
if (advice_mapanon < 0) {
advice_mapanon = advice;
} else {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: duplicate advice specified "
"(%s) for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile,
lineno);
}
break;
default:
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: invalid advice option (%s)"
" for %s - cfgfile: %s, line: %d\n"),
madvident, value, execname, cfgfile, lineno);
break;
}
}
}
static void
__madvmain()
{
char *cfgfile, *errfile;
FILE *fp = NULL;
const char *execname;
char *cwd;
int cwdlen;
char *tok, *tokadv, *tokarg;
char *str, *envadv;
int lineno = 0;
int advice;
uintptr_t brkbase, brkend;
size_t brksize;
int rc;
char *locale;
/*
* If a private error file is indicated then set the locale
* for error messages for the duration of this routine.
* Error messages destined for syslog should not be translated
* and thus come from the default C locale.
*/
if ((errfile = getenv(ENV_MADVERRFILE)) != NULL) {
errfp = fopen(errfile, "a");
if (errfp) {
locale = setlocale(LC_MESSAGES, "");
} else {
madverr(NULL, dgettext(TEXT_DOMAIN,
"%s: cannot open error file: %s [%s]\n"),
madvident, errfile, strerror(errno));
}
}
#ifdef MADVDEBUG
if (str = getenv(ENV_MADVDEBUG))
madvdebug = atoi(str);
#endif
if (envadv = getenv(ENV_MADV)) {
if ((advice = strtoadv(envadv)) >= 0)
advice_all = advice;
else
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: invalid advice specified: MADV=%s\n"),
madvident, envadv);
}
/*
* Open specified cfg file or default one.
*/
if (cfgfile = getenv(ENV_MADVCFGFILE)) {
fp = fopen(cfgfile, "r");
if (!fp) {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: cannot open configuration file: %s [%s]\n"),
madvident, cfgfile, strerror(errno));
}
} else {
cfgfile = DEF_MADVCFGFILE;
fp = fopen(cfgfile, "r");
}
if (fp) {
execname = mygetexecname();
cwd = getcwd(pbuf, MAXPATHLEN);
if (!cwd)
return;
cwd = strcat(cwd, "/");
cwdlen = strlen(cwd);
while (fgets(lbuf, MAXLINELEN, fp)) {
lineno++;
/*
* Make sure line wasn't truncated.
*/
if (strlen(lbuf) >= MAXLINELEN - 1) {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: invalid entry, "
"line too long - cfgfile:"
" %s, line: %d\n"),
madvident, cfgfile, lineno);
continue;
}
if (empty(lbuf))
continue;
/*
* Get advice options.
* Parse right to left in case delimiter is in name.
*/
if (!(tokadv = strrchr(lbuf, CFGDELIMITER))) {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: no delimiter specified - cfgfile:"
" %s, line: %d\n"),
madvident, cfgfile, lineno);
continue;
}
*tokadv++ = '\0';
/*
* Remove newline from end of advice options.
*/
if (str = strrchr(tokadv, '\n'))
*str = '\0';
/*
* Get optional argument string.
*/
if (tokarg = strrchr(lbuf, ARGDELIMITER)) {
*tokarg++ = '\0';
}
/*
* Compare exec name.
*/
tok = lbuf;
if (!fnmatch(execname, tok, cwd)) {
tokadv = tokarg = NULL;
cwd[cwdlen] = '\0';
continue;
}
/*
* Compare arguments if argument string specified.
*/
if (tokarg &&
!empty(tokarg) &&
!argmatch(tokarg)) {
tokadv = tokarg = NULL;
cwd[cwdlen] = '\0';
continue;
}
/*
* Parse advice options.
* If empty, any advice from ENV_MADV is reset.
*/
if (empty(tokadv)) {
advice_all = -1;
} else {
advice_opts(tokadv, execname, cfgfile, lineno);
}
break;
}
(void) fclose(fp);
}
/*
* Pagesize needed for proper aligning by brk interpose.
*/
pagesize = sysconf(_SC_PAGESIZE);
/*
* Apply global advice if set.
* Specific options in the cfgfile take precedence.
*/
if (advice_all >= 0) {
if (advice_heap < 0)
advice_heap = advice_all;
if (advice_shm < 0)
advice_shm = advice_all;
if (advice_map < 0)
advice_map = advice_all;
}
MADVPRINT(2, (stderr, "advice_all %d\n", advice_all));
MADVPRINT(2, (stderr, "advice_heap %d\n", advice_heap));
MADVPRINT(2, (stderr, "advice_shm %d\n", advice_shm));
MADVPRINT(2, (stderr, "advice_ism %d\n", advice_ism));
MADVPRINT(2, (stderr, "advice_dism %d\n", advice_dism));
MADVPRINT(2, (stderr, "advice_map %d\n", advice_map));
MADVPRINT(2, (stderr, "advice_mapshared %d\n", advice_mapshared));
MADVPRINT(2, (stderr, "advice_mapprivate %d\n", advice_mapprivate));
MADVPRINT(2, (stderr, "advice_mapanon %d\n", advice_mapanon));
/*
* If heap advice is specified, apply it to the existing heap.
* As the heap grows the kernel applies the advice automatically
* to new portions of the heap.
*/
if (advice_heap >= 0) {
if (rc = mygetbrk(&brkbase, &brksize)) {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: /proc/self/status read failed [%s]\n"),
madvident, strerror(rc));
} else {
MADVPRINT(4, (stderr, "brkbase 0x%x brksize 0x%x\n",
brkbase, brksize));
/*
* Align start address for memcntl and apply advice
* on full pages of heap. Create a page of heap if
* it does not already exist.
*/
brkend = roundup(brkbase+brksize, pagesize);
brkbase = roundup(brkbase, pagesize);
brksize = brkend - brkbase;
if (brksize < pagesize) {
if (sbrk(pagesize) == (void *)-1) {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: sbrk failed [%s]\n"),
madvident, strerror(errno));
goto out;
}
brksize = pagesize;
}
MADVPRINT(1, (stderr, "heap advice: 0x%x 0x%x %d\n",
brkbase, brksize, advice_heap));
if (memcntl((caddr_t)brkbase, brksize, MC_ADVISE,
(caddr_t)(intptr_t)advice_heap, 0, 0) < 0) {
madverr(errfp, dgettext(TEXT_DOMAIN,
"%s: memcntl() failed [%s]: heap advice\n"),
madvident, strerror(errno));
}
}
}
out:
if (errfp) {
(void) fclose(errfp);
(void) setlocale(LC_MESSAGES, locale);
} else {
/* close log file: no-op if nothing logged to syslog */
closelog();
}
}
/*
* shmat interpose
*/
void *
shmat(int shmid, const void *shmaddr, int shmflag)
{
static caddr_t (*shmatfunc)() = NULL;
void *result;
int advice = -1;
struct shmid_ds mds;
#ifdef MADVDEBUG
int rc;
#else
/* LINTED */
int rc;
#endif
if (!shmatfunc) {
shmatfunc = (caddr_t (*)()) dlsym(RTLD_NEXT, "shmat");
assert(shmatfunc);
}
result = shmatfunc(shmid, shmaddr, shmflag);
/*
* Options ism, dism take precedence over option shm.
*/
if (advice_ism >= 0 && (shmflag & SHM_SHARE_MMU)) {
advice = advice_ism;
} else if (advice_dism >= 0 && (shmflag & SHM_PAGEABLE)) {
advice = advice_dism;
} else if (advice_shm >= 0) {
advice = advice_shm;
}
/*
* Apply advice if specified and shmat succeeded.
*/
if (advice >= 0 && result != (void *)-1) {
/* First determine segment size */
rc = shmctl(shmid, IPC_STAT, &mds);
MADVPRINT(4, (stderr, "shmctl rc %d errno %d\n",
strerror(errno)));
rc = memcntl(result, mds.shm_segsz, MC_ADVISE,
(caddr_t)(intptr_t)advice, 0, 0);
MADVPRINT(1, (stderr,
"shmat advice: 0x%x 0x%x %d, rc %d errno %d\n",
result, mds.shm_segsz, advice, rc, errno));
}
return (result);
}
/*
* mmap interpose
*/
caddr_t
mmap(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
{
static caddr_t (*mmapfunc)() = NULL;
caddr_t result;
int advice = -1;
#ifdef MADVDEBUG
int rc;
#else
/* LINTED */
int rc;
#endif
if (!mmapfunc) {
mmapfunc = (caddr_t (*)()) dlsym(RTLD_NEXT, "mmap");
assert(mmapfunc);
}
result = mmapfunc(addr, len, prot, flags, fd, pos);
/*
* Option mapanon has highest precedence while option map
* has lowest precedence.
*/
if (advice_mapanon >= 0 && (flags & MAP_ANON)) {
advice = advice_mapanon;
} else if (advice_mapshared >= 0 && (flags & MAP_SHARED)) {
advice = advice_mapshared;
} else if (advice_mapprivate >= 0 && (flags & MAP_PRIVATE)) {
advice = advice_mapprivate;
} else if (advice_map >= 0) {
advice = advice_map;
}
/*
* Apply advice if specified and mmap succeeded.
*/
if (advice >= 0 && result != MAP_FAILED) {
rc = memcntl(result, len, MC_ADVISE,
(caddr_t)(intptr_t)advice, 0, 0);
MADVPRINT(1, (stderr,
"mmap advice: 0x%x 0x%x %d, rc %d errno %d\n",
result, len, advice, rc, errno));
}
return (result);
}
#if !defined(_LP64)
/*
* mmap64 interpose
*/
caddr_t
mmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off64_t pos)
{
static caddr_t (*mmap64func)();
caddr_t result;
int advice = -1;
#ifdef MADVDEBUG
int rc;
#else
/* LINTED */
int rc;
#endif
if (!mmap64func) {
mmap64func = (caddr_t (*)()) dlsym(RTLD_NEXT, "mmap64");
assert(mmap64func);
}
result = mmap64func(addr, len, prot, flags, fd, pos);
/*
* Option mapanon has highest precedence while option map
* has lowest precedence.
*/
if (advice_mapanon >= 0 && (flags & MAP_ANON)) {
advice = advice_mapanon;
} else if (advice_mapshared >= 0 && (flags & MAP_SHARED)) {
advice = advice_mapshared;
} else if (advice_mapprivate >= 0 && (flags & MAP_PRIVATE)) {
advice = advice_mapprivate;
} else if (advice_map >= 0) {
advice = advice_map;
}
/*
* Apply advice if specified and mmap succeeded.
*/
if (advice >= 0 && result != MAP_FAILED) {
rc = memcntl(result, len, MC_ADVISE, (caddr_t)advice, 0, 0);
MADVPRINT(1, (stderr,
"mmap64 advice: 0x%x 0x%x %d, rc %d errno %d\n",
result, len, advice, rc, errno));
}
return (result);
}
#endif /* !_LP64 */