/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#include "lint.h"
#include "thr_uberdata.h"
#include <sys/syscall.h>
extern long __systemcall6(sysret_t *, int, ...);
/*
* This is a small and simple power of two memory allocator that is
* used internally by libc. Allocations are fast and memory is never
* returned to the system, except for allocations of 64 Kbytes and larger,
* which are simply mmap()ed and munmap()ed as needed. Smaller allocations
* (minimum size is 64 bytes) are obtained from mmap() of 64K chunks
* broken up into unit allocations and maintained on free lists.
* The interface requires the caller to keep track of the size of an
* allocated block and to pass that size back when freeing a block.
*
* This allocator is called during initialization, from code called
* from the dynamic linker, so it must not call anything that might
* re-invoke the dynamic linker to resolve a symbol. That is,
* it must only call functions that are wholly private to libc.
*
* Also, this allocator must be unique across all link maps
* because pointers returned by lmalloc() are stored in the
* thread structure, which is constant across all link maps.
*
* Memory blocks returned by lmalloc() are initialized to zero.
*/
#define MINSIZE 64 /* (1 << MINSHIFT) */
#define MINSHIFT 6
#define CHUNKSIZE (64 * 1024)
/*
* bucketnum allocation size
* 0 64
* 1 128
* 2 256
* 3 512
* 4 1024
* 5 2048
* 6 4096
* 7 8192
* 8 16384
* 9 32768
*/
/*
* See "thr_uberdata.h" for the definition of bucket_t.
* The 10 (NBUCKETS) buckets are allocated in uberdata.
*/
/*
* Performance hack:
*
* On the very first lmalloc(), before any memory has been allocated,
* mmap() a 24K block of memory and carve out six 2K chunks, each
* of which is subdivided for the initial allocations from buckets
* 0, 1, 2, 3, 4 and 5, giving them initial numbers of elements
* 32, 16, 8, 4, 2 and 1, respectively. The remaining 12K is cut
* into one 4K buffer for bucket 6 and one 8K buffer for bucket 7.
*
* This results in almost all simple single-threaded processes,
* such as those employed in the kenbus test suite, having to
* allocate only this one 24K block during their lifetimes.
*/
#define SUBCHUNKSIZE 2048
#define BASE_SIZE (24 * 1024)
static void
initial_allocation(bucket_t *bp) /* &__uberdata.bucket[0] */
{
sysret_t rval;
void *ptr;
size_t size;
size_t n;
int bucketnum;
void *base;
/*
* We do this seemingly obtuse call to __systemcall6(SYS_mmap)
* instead of simply calling mmap() directly because, if the
* mmap() system call fails, we must make sure that __cerror()
* is not called, because that would call ___errno()
* which would dereference curthread and, because we are very
* early in libc initialization, curthread is NULL and we would
* draw a hard-to-debug SIGSEGV core dump, or worse.
* We opt to give a thread panic message instead.
*/
if (__systemcall6(&rval, SYS_mmap, CHUNKSIZE, BASE_SIZE,
PROT_READ | PROT_WRITE | PROT_EXEC,
_MAP_NEW | MAP_PRIVATE | MAP_ANON | MAP_ALIGN, -1L, (off_t)0) != 0)
thr_panic("initial allocation failed; swap space exhausted?");
base = (void *)rval.sys_rval1;
for (bucketnum = 0; bucketnum < 6; bucketnum++, bp++) {
size = (size_t)MINSIZE << bucketnum;
n = SUBCHUNKSIZE / size;
ptr = (void *)((caddr_t)base + bucketnum * SUBCHUNKSIZE);
ASSERT(bp->free_list == NULL);
bp->free_list = ptr;
while (--n != 0) {
void *next = (void *)((caddr_t)ptr + size);
*(void **)ptr = next;
ptr = next;
}
*(void **)ptr = NULL;
}
ptr = (void *)((caddr_t)base + bucketnum * SUBCHUNKSIZE);
ASSERT(bp->free_list == NULL);
bp->free_list = ptr;
ptr = (void *)((caddr_t)ptr + 2 * SUBCHUNKSIZE);
bp++;
ASSERT(bp->free_list == NULL);
bp->free_list = ptr;
ASSERT(((caddr_t)ptr - (caddr_t)base + 4 * SUBCHUNKSIZE) == BASE_SIZE);
}
/*
* This highbit code is the same as the code in fls_impl().
* We inline it here for speed.
*/
static int
getbucketnum(size_t size)
{
int highbit = 1;
if (size-- <= MINSIZE)
return (0);
#ifdef _LP64
if (size & 0xffffffff00000000ul)
highbit += 32, size >>= 32;
#endif
if (size & 0xffff0000)
highbit += 16, size >>= 16;
if (size & 0xff00)
highbit += 8, size >>= 8;
if (size & 0xf0)
highbit += 4, size >>= 4;
if (size & 0xc)
highbit += 2, size >>= 2;
if (size & 0x2)
highbit += 1;
ASSERT(highbit > MINSHIFT);
return (highbit - MINSHIFT);
}
void *
lmalloc(size_t size)
{
int bucketnum = getbucketnum(size);
ulwp_t *self;
uberdata_t *udp;
bucket_t *bp;
void *ptr;
/*
* ulwp_t structures must be allocated from a rwx mapping since it
* is a normal data object _and_ it contains instructions that are
* executed for user-land DTrace tracing with the fasttrap provider.
*/
int prot = PROT_READ | PROT_WRITE | PROT_EXEC;
/* round size up to the proper power of 2 */
size = (size_t)MINSIZE << bucketnum;
if (bucketnum >= NBUCKETS) {
/* mmap() allocates memory already set to zero */
ptr = mmap((void *)CHUNKSIZE, size, prot,
MAP_PRIVATE|MAP_ANON|MAP_ALIGN, -1, (off_t)0);
if (ptr == MAP_FAILED)
ptr = NULL;
return (ptr);
}
if ((self = __curthread()) == NULL)
udp = &__uberdata;
else
udp = self->ul_uberdata;
if (udp->bucket_init == 0) {
ASSERT(udp->nthreads == 0);
initial_allocation(udp->bucket);
udp->bucket_init = 1;
}
bp = &udp->bucket[bucketnum];
if (self != NULL)
lmutex_lock(&bp->bucket_lock);
if ((ptr = bp->free_list) == NULL) {
size_t bsize;
size_t n;
/*
* Double the number of chunks mmap()ed each time,
* in case of large numbers of allocations.
*/
if (bp->chunks == 0)
bp->chunks = 1;
else
bp->chunks <<= 1;
for (;;) {
bsize = CHUNKSIZE * bp->chunks;
n = bsize / size;
ptr = mmap((void *)CHUNKSIZE, bsize, prot,
MAP_PRIVATE|MAP_ANON|MAP_ALIGN, -1, (off_t)0);
if (ptr != MAP_FAILED)
break;
/* try a smaller chunk allocation */
if ((bp->chunks >>= 1) == 0) {
if (self != NULL)
lmutex_unlock(&bp->bucket_lock);
return (NULL);
}
}
bp->free_list = ptr;
while (--n != 0) {
void *next = (void *)((caddr_t)ptr + size);
*(void **)ptr = next;
ptr = next;
}
*(void **)ptr = NULL;
ptr = bp->free_list;
}
bp->free_list = *(void **)ptr;
if (self != NULL)
lmutex_unlock(&bp->bucket_lock);
/*
* We maintain the free list already zeroed except for the pointer
* stored at the head of the block (mmap() allocates memory already
* set to zero), so all we have to do is zero out the pointer.
*/
*(void **)ptr = NULL;
return (ptr);
}
void
lfree(void *ptr, size_t size)
{
int bucketnum = getbucketnum(size);
ulwp_t *self;
bucket_t *bp;
/* round size up to the proper power of 2 */
size = (size_t)MINSIZE << bucketnum;
if (bucketnum >= NBUCKETS) {
/* see comment below */
if (((uintptr_t)ptr & (CHUNKSIZE - 1)) != 0)
goto bad;
(void) munmap(ptr, size);
return;
}
/*
* If the low order bits are not all zero as expected, then panic.
* This can be caused by an application calling, for example,
* pthread_attr_destroy() without having first called
* pthread_attr_init() (thereby passing uninitialized data
* to pthread_attr_destroy() who then calls lfree() with
* the uninitialized data).
*/
if (((uintptr_t)ptr & (size - 1)) != 0)
goto bad;
/*
* Zeroing the memory here saves time later when reallocating it.
*/
(void) memset(ptr, 0, size);
if ((self = __curthread()) == NULL)
bp = &__uberdata.bucket[bucketnum];
else {
bp = &self->ul_uberdata->bucket[bucketnum];
lmutex_lock(&bp->bucket_lock);
}
*(void **)ptr = bp->free_list;
bp->free_list = ptr;
if (self != NULL)
lmutex_unlock(&bp->bucket_lock);
return;
bad:
thr_panic("lfree() called with a misaligned pointer");
}
/*
* The following functions can be used internally to libc
* to make memory allocations in the style of malloc()/free()
* (where the size of the allocation is not remembered by the caller)
* but which are safe to use within critical sections, that is,
* sections of code bounded by enter_critical()/exit_critical(),
* lmutex_lock()/lmutex_unlock() or lrw_rdlock()/lrw_wrlock()/lrw_unlock().
*
* These functions must never be used to allocate memory that is
* passed out of libc, for example by strdup(), because it is a
* fatal error to free() an object allocated by libc_malloc().
* Such objects can only be freed by calling libc_free().
*/
#ifdef _LP64
#define ALIGNMENT 16
#else
#define ALIGNMENT 8
#endif
typedef union {
size_t private_size;
char private_align[ALIGNMENT];
} private_header_t;
void *
libc_malloc(size_t size)
{
private_header_t *ptr;
size = (size_t)MINSIZE << getbucketnum(size + sizeof (*ptr));
if ((ptr = lmalloc(size)) == NULL)
return (NULL);
ptr->private_size = size;
return (ptr + 1);
}
void *
libc_realloc(void *old, size_t size)
{
private_header_t *ptr;
void *new;
size = (size_t)MINSIZE << getbucketnum(size + sizeof (*ptr));
if ((ptr = lmalloc(size)) == NULL)
return (NULL);
ptr->private_size = size;
new = ptr + 1;
if (old != NULL) {
ptr = (private_header_t *)old - 1;
if (size >= ptr->private_size)
size = ptr->private_size;
(void) memcpy(new, old, size - sizeof (*ptr));
lfree(ptr, ptr->private_size);
}
return (new);
}
void
libc_free(void *p)
{
private_header_t *ptr;
if (p) {
ptr = (private_header_t *)p - 1;
lfree(ptr, ptr->private_size);
}
}
char *
libc_strdup(const char *s1)
{
char *s2 = libc_malloc(strlen(s1) + 1);
if (s2)
(void) strcpy(s2, s1);
return (s2);
}