/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <mtmalloc.h>
#include "mtmalloc_impl.h"
#include <unistd.h>
#include <synch.h>
#include <thread.h>
#include <pthread.h>
#include <stdio.h>
#include <limits.h>
#include <errno.h>
#include <string.h>
#include <strings.h>
#include <sys/sysmacros.h>
/*
* To turn on the asserts just compile -DDEBUG
*/
#ifndef DEBUG
#define NDEBUG
#endif
#include <assert.h>
/*
* The MT hot malloc implementation contained herein is designed to be
* plug-compatible with the libc version of malloc. It is not intended
* to replace that implementation until we decide that it is ok to break
* customer apps (Solaris 3.0).
*
* For requests up to 2^^16, the allocator initializes itself into NCPUS
* worth of chains of caches. When a memory request is made, the calling thread
* is vectored into one of NCPUS worth of caches. The LWP id gives us a cheap,
* contention-reducing index to use, eventually, this should be replaced with
* the actual CPU sequence number, when an interface to get it is available.
*
* Once the thread is vectored into one of the list of caches the real
* allocation of the memory begins. The size is determined to figure out which
* bucket the allocation should be satisfied from. The management of free
* buckets is done via a bitmask. A free bucket is represented by a 1. The
* first free bit represents the first free bucket. The position of the bit,
* represents the position of the bucket in the arena.
*
* When the memory from the arena is handed out, the address of the cache
* control structure is written in the word preceeding the returned memory.
* This cache control address is used during free() to mark the buffer free
* in the cache control structure.
*
* When all available memory in a cache has been depleted, a new chunk of memory
* is allocated via sbrk(). The new cache is allocated from this chunk of memory
* and initialized in the function create_cache(). New caches are installed at
* the front of a singly linked list of the same size memory pools. This helps
* to ensure that there will tend to be available memory in the beginning of the
* list.
*
* Long linked lists hurt performance. To decrease this effect, there is a
* tunable, requestsize, that bumps up the sbrk allocation size and thus
* increases the number of available blocks within an arena. We also keep
* a "hint" for each cache list, which is the last cache in the list allocated
* from. This lowers the cost of searching if there are a lot of fully
* allocated blocks at the front of the list.
*
* For requests greater than 2^^16 (oversize allocations), there are two pieces
* of overhead. There is the OVERHEAD used to hold the cache addr
* (&oversize_list), plus an oversize_t structure to further describe the block.
*
* The oversize list is kept as defragmented as possible by coalescing
* freed oversized allocations with adjacent neighbors.
*
* Addresses handed out are stored in a hash table, and are aligned on
* MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up
* where necessary in order to achieve this. This eases the implementation of
* MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs.
*
* A memalign allocation takes memalign header overhead. There's two
* types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC
* and MTMALLOC_MEMALIGN_MIN_MAGIC. When the size of memory taken to
* get to the aligned address from malloc'ed address is the minimum size
* OVERHEAD, we create a header taking only one OVERHEAD space with magic
* number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD
* from memaligned address, we can get to the malloc'ed address. Otherwise,
* we create a memalign header taking two OVERHEAD space, one stores
* MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the
* malloc'ed address.
*/
#endif
static void add_oversize(oversize_t *);
static void reinit_cpu_list(void);
static void reinit_cache(cache_t *);
static void free_oversize(oversize_t *);
/*
* oversize hash table stuff
*/
& ~((uintptr_t)(a) - 1)))
/* need this to deal with little endianess of x86 */
#else
#define FLIP_EM(x) (x)
#endif
#define INSERT_ONLY 0
/* maximum size before overflow */
static int ncpus = 0;
/*
* We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte
* boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that
* this is achieved.
*/
/*
* memalign header takes 2 OVERHEAD space. One for memalign magic, and the
* other one points back to the start address of originally allocated space.
*/
else {\
(uintptr_t)malloc_addr; \
}
/*
* Add big to the oversize hash table at the head of the relevant bucket.
*/
static void
{
}
void *
{
if (bytes > MAX_CACHED)
}
void *
{
if (bytes == 0) {
return (NULL);
}
/*
* Optimization possibility :
* p = malloc(64);
* q = realloc(p, 64);
* q can be same as p.
* Apply this optimization for the normal
* sized caches for now.
*/
return (ptr);
}
return (NULL);
/*
* If new == ptr, ptr has previously been freed. Passing a freed pointer
* to realloc() is not allowed - unless the caller specifically states
* otherwise, in which case we must avoid freeing ptr (ie new) before we
* return new. There is (obviously) no requirement to memcpy() ptr to
* new before we return.
*/
if (!(debugopt & MTDOUBLEFREE))
abort();
return (new);
}
}
return (new);
}
return (new);
}
void *
{
void * ptr;
size = 0;
} else {
/* check for overflow */
return (NULL);
}
}
return (NULL);
return (ptr);
}
void
{
int32_t i;
return;
}
int bucket;
(void) mutex_lock(&oversize_lock);
break;
if (!(debugopt & MTDOUBLEFREE))
abort();
(void) mutex_unlock(&oversize_lock);
return;
}
if (debugopt & MTDEBUGPATTERN)
(void) mutex_unlock(&oversize_lock);
return;
}
/*
* This is the distance measured in bits into the arena.
* The value of offset is in bytes but there is a 1-1 correlation
* between distance into the arena and distance into the
* freelist bitmask.
*/
/*
* i is total number of bits to offset into freelist bitmask.
*/
num_bytes = i >> 3;
/*
* which_bit is the bit offset into the byte in the freelist.
* if our freelist bitmask looks like 0xf3 and we are freeing
* block 5 (ie: the 6th block) our mask will be 0xf7 after
* the free. Things go left to right that's why the mask is 0x80
* and not 0x01.
*/
freeblocks += num_bytes;
if (debugopt & MTDEBUGPATTERN)
if (*freeblocks & mask) {
if (!(debugopt & MTDOUBLEFREE))
abort();
} else {
*freeblocks |= mask;
}
}
void *
{
void *alloc_buf;
void *ret_buf;
return (NULL);
}
/* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */
if (alignment <= MTMALLOC_MIN_ALIGN)
return (NULL);
}
/* malloc sets errno */
return (NULL);
/*
* If alloc_size > MAX_CACHED, malloc() will have returned a multiple of
* MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since
* we will use alloc_size to return the excess fragments to the free
* list, we also round-up alloc_size if necessary.
*/
if ((alloc_size > MAX_CACHED) &&
/* aligned correctly */
/*
* If the leftover piece of the memory > MAX_CACHED,
* split off the piece and return it back to the freelist.
*/
}
} else {
/* needs to be aligned */
if (alloc_size <= MAX_CACHED) {
return (ret_buf);
}
/*
* Only check for the fragments when the memory is allocted
* from oversize_list. Split off a fragment and return it
* to the oversize freelist when it's > MAX_CACHED.
*/
tail_sz = alloc_size -
switch (oversize_bits) {
case NONE_OVERSIZE:
case DATA_OVERSIZE:
break;
case HEAD_OVERSIZE:
/*
* If we can extend data > MAX_CACHED and have
* head still > MAX_CACHED, we split head-end
* as the case of head-end and data oversized,
* otherwise just create memalign header.
*/
break;
} else {
taddr);
}
/* FALLTHROUGH */
case HEAD_AND_DATA_OVERSIZE:
/*
* Split off the head fragment and
* return it back to oversize freelist.
* Create oversize header for the piece
* of (data + tail fragment).
*/
(void) mutex_lock(&oversize_lock);
(void) mutex_unlock(&oversize_lock);
/* free up the head fragment */
break;
case TAIL_OVERSIZE:
/*
* If we can extend data > MAX_CACHED and have
* tail-end still > MAX_CACHED, we split tail
* end, otherwise just create memalign header.
*/
break;
} else {
}
/* FALLTHROUGH */
case DATA_AND_TAIL_OVERSIZE:
/*
* Split off the tail fragment and
* return it back to oversize freelist.
* Create memalign header and adjust
* the size for the piece of
* (head fragment + data).
*/
break;
case HEAD_AND_TAIL_OVERSIZE:
/*
* Split off the head fragment.
* We try to free up tail-end when we can
* extend data size to (MAX_CACHED + 8)
* and remain tail-end oversized.
* The bottom line is all split pieces
* should be oversize in size.
*/
/*
* If the chunk is not big enough
* to make both data and tail oversize
* we just keep them as one piece.
*/
(void) mutex_lock(&oversize_lock);
(void) mutex_unlock(&oversize_lock);
break;
} else {
/*
* extend data size > MAX_CACHED
* and handle it as head, data, tail
* are all oversized.
*/
}
/* FALLTHROUGH */
case ALL_OVERSIZE:
/*
* split off the head and tail fragments,
* return them back to the oversize freelist.
* Alloc oversize header for data seg.
*/
/* create oversize header for data seg */
(void) mutex_lock(&oversize_lock);
(void) mutex_unlock(&oversize_lock);
/* create oversize header for tail fragment */
break;
default:
/* should not reach here */
assert(0);
}
}
return (ret_buf);
}
void *
{
static unsigned pagesize;
if (size == 0)
return (NULL);
if (!pagesize)
}
void
{
switch (cmd) {
case MTDEBUGPATTERN:
/*
* Reinitialize free blocks in case malloc() is called prior
* to mallocctl().
*/
reinit++;
}
/*FALLTHRU*/
case MTDOUBLEFREE:
case MTINITBUFFER:
if (value)
else
break;
case MTCHUNKSIZE:
requestsize = value;
break;
default:
break;
}
}
/*
* Initialization function, called from the init section of the library.
* No locking is required here because we are single-threaded during
* library initialization.
*/
static void
setup_caches(void)
{
uint_t i, j;
/*
* Get a decent "current cpu identifier", to be used to reduce
* contention. Eventually, this should be replaced by an interface
*/
/* round ncpus up to a power of 2 */
ncpus++;
/*
* We now do some magic with the brk. What we want to get in the
* end is a bunch of well-aligned stuff in a big initial allocation.
* Along the way, we do sanity checks to make sure no one else has
* touched the brk (which shouldn't happen, but it's always good to
* check)
*
* First, make sure sbrk is sane, and store the current brk in oldbrk.
*/
if ((void *)oldbrk == (void *)-1)
abort(); /* sbrk is broken -- we're doomed. */
/*
* Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that
* the percpu structures and cache lists will be properly aligned.
*
* 2. All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE,
* so they can be paged out individually.
*/
abort(); /* sbrk is broken -- we're doomed. */
/*
* For each cpu, there is one percpu_t and a list of caches
*/
abort(); /* sbrk is broken -- we're doomed. */
/*
* Finally, align the brk to HUNKSIZE so that all hunks are
* page-aligned, to avoid edge-effects.
*/
abort(); /* sbrk is broken -- we're doomed. */
/* initialize the percpu list */
for (i = 0; i < ncpus; i++) {
for (j = 0; j < NUM_CACHES; j++) {
}
USYNC_THREAD, NULL);
/* get the correct cache list alignment */
}
/*
* Initialize oversize listhead
*/
/*
* Now install the global variables.
*/
curcpu = new_curcpu;
}
static void
{
long nblocks;
/*
* rough calculation. We will need to adjust later.
*/
nblocks >>= 3;
if (nblocks == 0) { /* less than 8 free blocks in this pool */
while (i > sub) {
numblocks++;
i -= sub;
}
while (numblocks--) {
}
} else {
nblocks, 32);
/* recompute nblocks */
/* Set everything to free */
}
if (debugopt & MTDEBUGPATTERN)
}
static void
reinit_cpu_list(void)
{
/* Reinitialize free oversize blocks. */
(void) mutex_lock(&oversize_lock);
if (debugopt & MTDEBUGPATTERN)
(void) mutex_unlock(&oversize_lock);
/* Reinitialize free blocks. */
(void) mutex_unlock(
continue;
}
}
}
}
reinit = 0;
}
static void
{
int32_t i, n;
if (*freeblocks & 0xffffffff) {
for (i = 0; i < 32; i++) {
n = (uintptr_t)(((freeblocks -
}
}
}
freeblocks++;
}
}
static void *
{
logsz++;
/*
* Find a cache of the appropriate size with free buffers.
*
* We don't need to lock each cache as we check their mt_nfree count,
* since:
* 1. We are only looking for caches with mt_nfree > 0. If a
* free happens during our search, it will increment mt_nfree,
* which will not effect the test.
* 2. Allocations can decrement mt_nfree, but they can't happen
* as long as we hold mt_parent_lock.
*/
/* Search through the list, starting at the mt_hint */
/* wrap around -- search up to the hint */
}
return (NULL);
}
/* link in the new block at the beginning of the list */
}
/* update the hint to the cache we found or created */
/* thiscache now points to a cache with available space */
if (*freeblocks & 0xffffffff)
break;
freeblocks++;
*freeblocks & 0xffffffff)
break;
freeblocks++;
*freeblocks & 0xffffffff)
break;
freeblocks++;
*freeblocks & 0xffffffff)
break;
freeblocks++;
}
/*
* the offset from mt_freelist to freeblocks is the offset into
* the arena. Be sure to include the offset into freeblocks
* of the bitmask. n is the offset.
*/
for (i = 0; i < 32; ) {
break;
break;
break;
break;
}
index = 0x80000000 >> --i;
/*
* Now you have the offset in n, you've changed the free mask
* in the freelist. Nothing left to do but find the block
* in the arena and put the value of thiscache in the word
* ahead of the handed out address and return the memory
* back to the user.
*/
/* Store the cache addr for this buf. Makes free go fast. */
/*
* This assert makes sure we don't hand out memory that is not
* owned by this cache.
*/
abort(); /* reference after free */
if (debugopt & MTINITBUFFER)
return ((void *)ret);
}
static void *
{
void * ret;
/*
* The request size is too big. We need to do this in
* chunks. Sbrk only takes an int for an arg.
*/
return ((void *)-1);
while (wad > 0) {
return ((void *)-1);
}
}
} else
return (ret);
}
static void *
{
/* make sure we will not overflow */
if (size > MAX_MTMALLOC) {
return (NULL);
}
/*
* Since we ensure every address we hand back is
* MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the
* memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends.
* This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN,
* particularly where coalescing occurs.
*/
/*
* The idea with the global lock is that we are sure to
* block in the kernel anyway since given an oversize alloc
* we are sure to have to call morecore();
*/
(void) mutex_lock(&oversize_lock);
abort(); /* reference after free */
} else {
/* Get more 8-byte aligned memory from heap */
(void) mutex_unlock(&oversize_lock);
return (NULL);
}
}
if (debugopt & MTINITBUFFER)
(void) mutex_unlock(&oversize_lock);
return ((void *)ret);
}
static void
{
/* locate correct insertion point in size-ordered list */
;
/* link into size-ordered list */
/*
* link item into address-ordered list
* (caller provides insertion point as an optimization)
*/
}
static void
{
/* unlink from address list */
/* unlink from size list */
}
static void
{
/* unlink from size list */
/* locate correct insertion point in size-ordered list */
;
/* link into size-ordered list */
}
}
static void
{
/*
* Locate insertion point in address-ordered list
*/
;
/*
* Determine how to add chunk to oversize freelist
*/
/* Check for adjacency with left chunk */
}
/* Check for adjacency with right chunk */
}
}
/*
* If MTDEBUGPATTERN==1, lp->addr will have been overwritten with
* FREEPATTERN for lp->size bytes. If we can merge, the oversize
* header(s) that will also become part of the memory available for
* FREEPATTERN or we will SIGABRT when this memory is next reallocated.
*/
switch (merge_flags) {
case INSERT_ONLY: /* Coalescing not possible */
break;
case COALESCE_LEFT:
if (debugopt & MTDEBUGPATTERN)
break;
case COALESCE_RIGHT:
if (debugopt & MTDEBUGPATTERN)
break;
case COALESCE_WITH_BOTH_SIDES: /* Merge (with right) to the left */
if (debugopt & MTDEBUGPATTERN) {
}
break;
}
}
/*
* Find memory on our list that is at least size big. If we find a block that is
* big enough, we break it up and return the associated oversize_t struct back
* to the calling client. Any leftover piece of that block is returned to the
* freelist.
*/
static oversize_t *
{
return (NULL);
/* breaking up a chunk of memory */
> MAX_CACHED) {
abort();
} else {
}
return (wp);
}
static void
{
buf += 4;
}
}
static void *
{
return (buf);
return (NULL);
}
static void
{
(void) mutex_lock(&oversize_lock);
(void) mutex_unlock(&oversize_lock);
}
static oversize_t *
{
return (ovsz_hdr);
}
static void
{
(void) mutex_lock(&oversize_lock);
cachehead++) {
(void) mutex_lock(
}
}
}
}
static void
{
cachehead--) {
(void) mutex_unlock(
}
}
}
(void) mutex_unlock(&oversize_lock);
}
#pragma init(malloc_init)
static void
malloc_init(void)
{
/*
* This works in the init section for this library
* because setup_caches() doesn't call anything in libc
* that calls malloc(). If it did, disaster would ensue.
*
* For this to work properly, this library must be the first
* one to have its init section called (after libc) by the
* dynamic linker. If some other library's init section
* ran first and called malloc(), disaster would ensue.
* Because this is an interposer library for malloc(), the
* dynamic linker arranges for its init section to run first.
*/
(void) setup_caches();
}