libmtmalloc/common/mtmalloc.c

2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A
2N/A/*
2N/A * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
2N/A */
2N/A
2N/A/* LINTLIBRARY */
2N/A
2N/A#include <mtmalloc.h>
2N/A#include "mtmalloc_impl.h"
2N/A#include <unistd.h>
2N/A#include <synch.h>
2N/A#include <thread.h>
2N/A#include <pthread.h>
2N/A#include <stdio.h>
2N/A#include <limits.h>
2N/A#include <errno.h>
2N/A#include <string.h>
2N/A#include <strings.h>
2N/A#include <sys/param.h>
2N/A#include <sys/sysmacros.h>
2N/A#include <sys/types.h>
2N/A#include <sys/mman.h>
2N/A#include <atomic.h>
2N/A#include <stdlib.h>
2N/A#define REINIT(v)   ((v) & 0x1) /* if odd reinit is active */
2N/A
2N/A/*
2N/A * To turn on the asserts just compile -DDEBUG
2N/A */
2N/A
2N/A#ifndef DEBUG
2N/A#define NDEBUG
2N/A#endif
2N/A
2N/A#include <assert.h>
2N/A
2N/A/*
2N/A * The MT hot malloc implementation contained herein is designed to be
2N/A * plug-compatible with the libc version of malloc. It is not intended
2N/A * to replace that implementation until we decide that it is ok to break
2N/A * customer apps (Solaris 3.0).
2N/A *
2N/A * For requests up to 2^^16, the allocator initializes itself into 4*NCPUS
2N/A * worth of chains of caches. When a memory request is made, and
2N/A * MTEXCLUSIVE is not enabled the calling thread is vectored into one
2N/A * of 2*NCPUS worth of caches. The LWP id gives us a cheap,
2N/A * contention-reducing index to use. If MTEXCLUSIVE has been enabled,
2N/A * and the thread id is less than or equal to 2*NCPUS, then the request
2N/A * is routed to an exclusive bucket based on thread id. If the thread id is
2N/A * > 2*NCPUS, then it is routed as if MTEXCLUSIVE is not enabled.
2N/A *
2N/A * Once the thread is vectored into one of the list of caches the real
2N/A * allocation of the memory begins. The size is determined to figure out which
2N/A * bucket the allocation should be satisfied from. The management of free
2N/A * buckets is done via a bitmask. A free bucket is represented by a 1. The
2N/A * first free bit represents the first free bucket. The position of the bit,
2N/A * represents the position of the bucket in the arena.
2N/A *
2N/A * When the memory from the arena is handed out, the address of the cache
2N/A * control structure is written in the word preceding the returned memory.
2N/A * This cache control address is used during free() to mark the buffer free
2N/A * in the cache control structure.
2N/A *
2N/A * When all available memory in a cache has been depleted, a new chunk of memory
2N/A * is allocated via sbrk(). The new cache is allocated from this chunk of memory
2N/A * and initialized in the function create_cache(). New caches are installed at
2N/A * into the last array, and when that array is filled a new cachespaceblock
2N/A * is created and the cache installed at the beginning of the new array.
2N/A * The new cachespaceblock is installed at the front of the singly
2N/A * linked list of cachespaceblocks for the same size memory pools.
2N/A * This helps to ensure that there will tend to be available memory
2N/A * in the beginning of the list.
2N/A *
2N/A * Long linked lists hurt performance. To decrease this effect, there is a
2N/A * tunable, requestsize, that bumps up the sbrk allocation size and thus
2N/A * increases the number of available blocks within an arena.  We also keep
2N/A * a "hint" for each cachespaceblock , which is the last cachespaceblock
2N/A * in the list allocated from. This lowers the cost of searching if there
2N/A * are a lot of fully allocated cachespaceblock at the front of the list.
2N/A *
2N/A * For requests greater than 2^^16 (oversize allocations), there are two pieces
2N/A * of overhead. There is the OVERHEAD used to hold the cache addr
2N/A * (&oversize_list), plus an oversize_t structure to further describe the block.
2N/A * This Oversize Allocation trigger is tunable. MTMAXCACHE can be set
2N/A * in the environment variable MTMALLOC_OPTIONS to 16,17,18,19,20,or 21.
2N/A * This results in using the buckets for all allocations less than
2N/A * 2^^MTMAXCACHE.
2N/A *
2N/A * The oversize list is kept as defragmented as possible by coalescing
2N/A * freed oversized allocations with adjacent neighbors.
2N/A *
2N/A * Addresses handed out are stored in a hash table, and are aligned on
2N/A * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up
2N/A * where necessary in order to achieve this. This eases the implementation of
2N/A * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs.
2N/A *
2N/A * A memalign allocation takes memalign header overhead.  There's two
2N/A * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC
2N/A * and MTMALLOC_MEMALIGN_MIN_MAGIC.  When the size of memory taken to
2N/A * get to the aligned address from malloc'ed address is the minimum size
2N/A * OVERHEAD, we create a header taking only one OVERHEAD space with magic
2N/A * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD
2N/A * from memaligned address, we can get to the malloc'ed address. Otherwise,
2N/A * we create a memalign header taking two OVERHEAD space, one stores
2N/A * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the
2N/A * malloc'ed address.
2N/A */
2N/A
2N/Astatic int setup_caches(void);
2N/Astatic void * morecore(size_t);
2N/Astatic void create_cache(cache_t *, size_t bufsize,
2N/A    uint_t hunks, volatile uint_t *cs);
2N/A/*
2N/A * The additional parameter for malloc_internal
2N/A * is for a flag controlling the use of the parent lock.
2N/A */
2N/Astatic void *malloc_internal(size_t, percpu_t *, uint_t);
2N/Astatic void *oversize(size_t);
2N/Astatic oversize_t *find_oversize(size_t);
2N/Astatic void add_oversize(oversize_t *);
2N/Astatic void copy_pattern(uint32_t, void *, size_t);
2N/Astatic void * verify_pattern(uint32_t, void *, size_t);
2N/Astatic void reinit_cpu_list(void);
2N/Astatic void reinit_cache(cache_t *);
2N/Astatic void free_oversize(oversize_t *, int);
2N/Astatic oversize_t *oversize_header_alloc(uintptr_t, size_t);
2N/A
2N/A/*
2N/A * oversize hash table stuff
2N/A */
2N/A#define NUM_BUCKETS 67  /* must be prime */
2N/A#define HASH_OVERSIZE(caddr)    ((uintptr_t)(caddr) % NUM_BUCKETS)
2N/Astatic oversize_t *ovsz_hashtab[NUM_BUCKETS];
2N/A
2N/A#define ALIGN(x, a) ((((uintptr_t)(x) + ((uintptr_t)(a) - 1))\
2N/A        & ~((uintptr_t)(a) - 1)))
2N/A/*
2N/A * Gets a decent "current cpu identifier", to be used to reduce contention.
2N/A * Eventually, this should be replaced by an interface to get the actual
2N/A * CPU sequence number in libthread/liblwp.
2N/A */
2N/A#define get_curcpu_func() (curcpu_func)thr_self
2N/A
2N/A#define FREEMASK_BITS           (NBBY * sizeof (ulong_t))
2N/A#define INSERT_ONLY         0
2N/A#define COALESCE_LEFT           0x00000001
2N/A#define COALESCE_RIGHT          0x00000002
2N/A#define COALESCE_WITH_BOTH_SIDES    (COALESCE_LEFT | COALESCE_RIGHT)
2N/A
2N/A#define OVERHEAD    8   /* size needed to write cache addr */
2N/A#define HUNKSIZE    8192    /* just a multiplier */
2N/A#define MBUFSZ      512 /* MTMALLOC_OPTIONS maximim size */
2N/A#define TRUE        1
2N/A#define FALSE       0
2N/A
2N/Astatic int MAX_CACHED_SHIFT = 16;   /* 64K is the max cached size */
2N/Astatic int MAX_CACHED;      /* (1 << MAX_CACHED_SHIFT) */
2N/A#define MIN_CACHED_SHIFT    4   /* smaller requests rounded up */
2N/A#define MTMALLOC_MIN_ALIGN  8   /* min guaranteed alignment */
2N/A
2N/A/* maximum size before overflow */
2N/A#define MAX_MTMALLOC    (SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \
2N/A            - OVSZ_HEADER_SIZE)
2N/A
2N/Astatic int NUM_CACHES;   /* (MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1) */
2N/Astatic int CACHELIST_SIZE;
2N/A/*
2N/A * ALIGN(NUM_CACHES *
2N/A * sizeof (cache_head_t),CACHE_COHERENCY_UNIT)
2N/A */
2N/Astatic ulong_t MTPAGESIZE;
2N/A#ifdef _LP64
2N/Astatic int MINSIZE = 64; /* for requestsize, tunable */
2N/A#else
2N/Astatic int MINSIZE = 9; /* for requestsize, tunable */
2N/A#endif
2N/A
2N/A#define MAXSIZE     512 /* arbitrary, big enough, for requestsize */
2N/A
2N/A#define FREEPATTERN 0xdeadbeef /* debug fill pattern for free buf */
2N/A#define INITPATTERN 0xbaddcafe /* debug fill pattern for new buf */
2N/A
2N/A#define misaligned(p)   ((unsigned)(p) & (sizeof (int) - 1))
2N/A#define IS_OVERSIZE(x, y)   (((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0))
2N/A
2N/Astatic long requestsize; /* 9 pages per cache; tunable; 9 is min */
2N/A
2N/Astatic uint_t cpu_mask;
2N/Astatic curcpu_func curcpu;
2N/A
2N/Astatic volatile int32_t debugopt = 0;
2N/Astatic volatile uint32_t reinit = 0;
2N/A
2N/Astatic percpu_t *cpu_list;
2N/Astatic percpu_t *exclusive_list; /* points into the top half of cpu_list */
2N/Astatic oversize_t oversize_list = {
2N/A    &oversize_list, &oversize_list,
2N/A    &oversize_list, &oversize_list,
2N/A    NULL,
2N/A    0           /* sentinal */
2N/A};
2N/Astatic mutex_t oversize_lock = DEFAULTMUTEX;
2N/A
2N/Astatic int ncpus = 0;
2N/A
2N/A/*
2N/A * lockfree_buckets buckets are created in the top half of the cpu_list
2N/A * and do not use  the parent lock. By default lockfree_threshold is 0
2N/A * and no lwps get an exclusive bucket. When MTEXCLSIVE is set via
2N/A * mallocctl or the environment variable option, then lockfree_threshold
2N/A * is set to lockfree_buckets and lwps with threadid <=lockfree_threshold
2N/A * get an exclusive bucket.
2N/A */
2N/Astatic int lockfree_buckets = 0;
2N/A
2N/Astatic volatile uint_t lockfree_threshold = 0;
2N/A/*
2N/A * if do_realfree != 0 call madvise for all allocations >
2N/A * do_realfree*pagesize
2N/A * do_realfree must be > 1
2N/A */
2N/Astatic ulong_t do_realfree  = 0;
2N/A
2N/A#define MTMALLOC_OVERSIZE_MAGIC     ((uintptr_t)&oversize_list)
2N/A#define MTMALLOC_MEMALIGN_MAGIC     ((uintptr_t)&oversize_list + 1)
2N/A#define MTMALLOC_MEMALIGN_MIN_MAGIC ((uintptr_t)&oversize_list + 2)
2N/A
2N/A/*
2N/A * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte
2N/A * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that
2N/A * this is achieved.
2N/A */
2N/A#define OVSZ_SIZE       (ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN))
2N/A#define OVSZ_HEADER_SIZE    (OVSZ_SIZE + OVERHEAD)
2N/A
2N/A/*
2N/A * memalign header takes 2 OVERHEAD space.  One for memalign magic, and the
2N/A * other one points back to the start address of originally allocated space.
2N/A */
2N/A#define MEMALIGN_HEADER_SIZE    2 * OVERHEAD
2N/A#define MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\
2N/A    if (shift == OVERHEAD)\
2N/A        *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
2N/A            MTMALLOC_MEMALIGN_MIN_MAGIC; \
2N/A    else {\
2N/A        *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
2N/A            MTMALLOC_MEMALIGN_MAGIC; \
2N/A        *((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \
2N/A            (uintptr_t)malloc_addr; \
2N/A    }
2N/A
2N/A/*
2N/A * Add big to the oversize hash table at the head of the relevant bucket.
2N/A */
2N/Astatic void
2N/Ainsert_hash(oversize_t *big)
2N/A{
2N/A    caddr_t ret = big->addr;
2N/A    int bucket = HASH_OVERSIZE(ret);
2N/A
2N/A    assert(MUTEX_HELD(&oversize_lock));
2N/A    big->hash_next = ovsz_hashtab[bucket];
2N/A    ovsz_hashtab[bucket] = big;
2N/A}
2N/A
2N/Astatic int
2N/Aremove_hash(oversize_t *big)
2N/A{
2N/A    oversize_t **opp;
2N/A    int bucket = HASH_OVERSIZE(big->addr);
2N/A
2N/A    assert(MUTEX_HELD(&oversize_lock));
2N/A    for (opp = &ovsz_hashtab[bucket]; *opp != NULL;
2N/A        opp = &(*opp)->hash_next) {
2N/A        if (*opp == big)
2N/A            break;
2N/A    }
2N/A    if (*opp == NULL)
2N/A        return (-1);
2N/A
2N/A    *opp = big->hash_next;  /* remove big from the hash table */
2N/A    big->hash_next = NULL;
2N/A    return (0);
2N/A}
2N/A
2N/Avoid *
2N/Amalloc(size_t bytes)
2N/A{
2N/A    uint_t this_lwp; /* the current thread id */
2N/A
2N/A    if (bytes > MAX_CACHED) {
2N/A        /*
2N/A         * the following change was made to improve DTrace
2N/A         * tracability, without it tail call elimination
2N/A         * makes it impossible to trace on the return from
2N/A         * oversize
2N/A         */
2N/A        void *ret_ptr = oversize(bytes);
2N/A        return (ret_ptr);
2N/A    }
2N/A    /*
2N/A     * If setup_caches fails, we set ENOMEM and return NULL
2N/A     */
2N/A    if (cpu_list == (percpu_t *)NULL) {
2N/A        if (setup_caches() == 0) {
2N/A            errno = ENOMEM;
2N/A            return (NULL);
2N/A        }
2N/A    }
2N/A
2N/A    this_lwp = curcpu();
2N/A    if (this_lwp <= lockfree_threshold) {
2N/A        return (malloc_internal(bytes,
2N/A            &exclusive_list[this_lwp - 1], FALSE));
2N/A    }
2N/A    return (malloc_internal(bytes, &cpu_list[(this_lwp & cpu_mask)],
2N/A        TRUE));
2N/A
2N/A}
2N/A
2N/Avoid *
2N/Arealloc(void * ptr, size_t bytes)
2N/A{
2N/A    void *new, *data_ptr;
2N/A    cache_t *cacheptr;
2N/A    caddr_t mem;
2N/A    size_t shift = 0;
2N/A
2N/A    if (bytes == 0) {
2N/A        free(ptr);
2N/A        return ((void *)0x00000001);
2N/A    }
2N/A
2N/A    if (ptr <= (void *)0x00000001)
2N/A        return (malloc(bytes));
2N/A
2N/A    data_ptr = ptr;
2N/A    mem = (caddr_t)ptr - OVERHEAD;
2N/A
2N/A    /*
2N/A     * Optimization possibility :
2N/A     *  p = malloc(64);
2N/A     *  q = realloc(p, 64);
2N/A     * q can be same as p.
2N/A     * Apply this optimization for the normal
2N/A     * sized caches for now.
2N/A     */
2N/A    if (*(uintptr_t *)mem < MTMALLOC_OVERSIZE_MAGIC ||
2N/A        *(uintptr_t *)mem > MTMALLOC_MEMALIGN_MIN_MAGIC) {
2N/A        cacheptr = (cache_t *)*(uintptr_t *)mem;
2N/A        if (bytes <= (cacheptr->mt_size - OVERHEAD))
2N/A            return (ptr);
2N/A    }
2N/A
2N/A    new = malloc(bytes);
2N/A
2N/A    if (new == NULL)
2N/A        return (NULL);
2N/A
2N/A    /*
2N/A     * If new == ptr, ptr has previously been freed. Passing a freed pointer
2N/A     * to realloc() is not allowed - unless the caller specifically states
2N/A     * otherwise, in which case we must avoid freeing ptr (ie new) before we
2N/A     * return new. There is (obviously) no requirement to memcpy() ptr to
2N/A     * new before we return.
2N/A     */
2N/A    if (new == ptr) {
2N/A        if (!(debugopt & MTDOUBLEFREE))
2N/A            abort();
2N/A        return (new);
2N/A    }
2N/A
2N/A    if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
2N/A        mem -= OVERHEAD;
2N/A        ptr = (void *)*(uintptr_t *)mem;
2N/A        mem = (caddr_t)ptr - OVERHEAD;
2N/A        shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr);
2N/A    } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
2N/A        ptr = (void *) mem;
2N/A        mem -= OVERHEAD;
2N/A        shift = OVERHEAD;
2N/A    }
2N/A
2N/A    if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
2N/A        oversize_t *old;
2N/A
2N/A        old = (oversize_t *)(mem - OVSZ_SIZE);
2N/A        (void) memcpy(new, data_ptr, MIN(bytes, old->size - shift));
2N/A        free(ptr);
2N/A        return (new);
2N/A    }
2N/A
2N/A    cacheptr = (cache_t *)*(uintptr_t *)mem;
2N/A
2N/A    (void) memcpy(new, data_ptr,
2N/A        MIN(cacheptr->mt_size - OVERHEAD - shift, bytes));
2N/A    free(ptr);
2N/A
2N/A    return (new);
2N/A}
2N/A
2N/Avoid *
2N/Acalloc(size_t nelem, size_t bytes)
2N/A{
2N/A    void * ptr;
2N/A    size_t size = nelem * bytes;
2N/A
2N/A    if (nelem != 0 && (size / nelem) != bytes) {
2N/A        errno = ENOMEM; /* overflow */
2N/A        return (NULL);
2N/A    }
2N/A    ptr = malloc(size);
2N/A    if (ptr == NULL)
2N/A        return (NULL);
2N/A    (void) memset(ptr, 0, size);
2N/A
2N/A    return (ptr);
2N/A}
2N/A
2N/Avoid
2N/Afree(void * ptr)
2N/A{
2N/A    cache_t *cacheptr;
2N/A    caddr_t mem;
2N/A    int32_t i;
2N/A    volatile ulong_t *freemask;
2N/A    uintptr_t offset;
2N/A    ulong_t page_offset;
2N/A    int32_t which_bit;
2N/A
2N/A    if (ptr <= (void *)0x00000001)
2N/A        return;
2N/A
2N/A    mem = (caddr_t)ptr - OVERHEAD;
2N/A
2N/A    if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
2N/A        mem -= OVERHEAD;
2N/A        ptr = (void *)*(uintptr_t *)mem;
2N/A        mem = (caddr_t)ptr - OVERHEAD;
2N/A    } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
2N/A        ptr = (void *) mem;
2N/A        mem -= OVERHEAD;
2N/A    }
2N/A
2N/A    if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
2N/A        oversize_t *big;
2N/A
2N/A        big = (oversize_t *)(mem - OVSZ_SIZE);
2N/A        (void) mutex_lock(&oversize_lock);
2N/A
2N/A        if (remove_hash(big) == -1) {
2N/A            if (!(debugopt & MTDOUBLEFREE))
2N/A                abort();
2N/A            (void) mutex_unlock(&oversize_lock);
2N/A            return;
2N/A        }
2N/A
2N/A        /*
2N/A         * big is the oversize_t
2N/A         * big->addr is what was returned from malloc
2N/A         * big->size is length
2N/A         * so if addr is page aligned
2N/A         * madvise(big->addr, big->size, MADV_FREE)
2N/A         * even if addr is not page aligned, then we can still madvise
2N/A         * a subset of the range
2N/A         * recall that a oversize allocation is always > a page
2N/A         * page_offset=addr%pagesize
2N/A         * if (page_offset != 0 ) page_offset=pagesize-page_offset
2N/A         * madvise(addr+offset, big->size - offset, MADV_FREE);
2N/A         */
2N/A        if (do_realfree && (big->size >= do_realfree)) {
2N/A            page_offset = ((ulong_t)big->addr)%MTPAGESIZE;
2N/A            if (page_offset != 0)
2N/A                page_offset = MTPAGESIZE - page_offset;
2N/A            i = madvise(big->addr + page_offset, big->size -
2N/A                page_offset, MADV_FREE);
2N/A        }
2N/A
2N/A        if (debugopt & MTDEBUGPATTERN)
2N/A            copy_pattern(FREEPATTERN, ptr, big->size);
2N/A        add_oversize(big);
2N/A        (void) mutex_unlock(&oversize_lock);
2N/A        return;
2N/A    }
2N/A
2N/A    cacheptr = (cache_t *)*(uintptr_t *)mem;
2N/A    /*
2N/A     * This is the distance measured in bits into the arena.
2N/A     * The value of offset is in bytes but there is a 1-1 correlation
2N/A     * between distance into the arena and distance into the
2N/A     * freelist bitmask.
2N/A     */
2N/A    offset = mem - cacheptr->mt_arena;
2N/A
2N/A    /*
2N/A     * i is total number of bits to offset into freelist bitmask.
2N/A     */
2N/A
2N/A    i = offset / cacheptr->mt_size;
2N/A
2N/A    freemask = &cacheptr->mt_freemask[i / FREEMASK_BITS];
2N/A
2N/A    /*
2N/A     * which_bit is the bit offset into the byte in the freelist.
2N/A     * if our freelist bitmask looks like 0xf3 and we are freeing
2N/A     * block 5 (ie: the 6th block) our mask will be 0xf7 after
2N/A     * the free. Things go left to right that's why the mask is 0x80
2N/A     * and not 0x01.
2N/A     */
2N/A    which_bit = i % FREEMASK_BITS;
2N/A    /* we copy the pattern before we set the bit to free */
2N/A    if (debugopt & MTDEBUGPATTERN)
2N/A        copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD);
2N/A
2N/A    /* madvise if requested */
2N/A    if (do_realfree && (cacheptr->mt_size >= do_realfree)) {
2N/A        page_offset = ((ulong_t)mem)%MTPAGESIZE;
2N/A        if (page_offset != 0)
2N/A            page_offset = MTPAGESIZE - page_offset;
2N/A        i = madvise(mem + page_offset, cacheptr->mt_size -
2N/A            page_offset, MADV_FREE);
2N/A    }
2N/A
2N/A    if (atomic_set_long_excl(freemask, which_bit) != 0) {
2N/A        if (!(debugopt & MTDOUBLEFREE))
2N/A            abort();
2N/A    } else {
2N/A        (void) atomic_inc_uint(cacheptr->mt_nfreeptr);
2N/A    }
2N/A}
2N/A
2N/Avoid *
2N/Amemalign(size_t alignment, size_t size)
2N/A{
2N/A    size_t alloc_size;
2N/A    uintptr_t offset;
2N/A    void *alloc_buf;
2N/A    void *ret_buf;
2N/A
2N/A    if (size == 0 || alignment == 0 ||
2N/A        misaligned(alignment) ||
2N/A        (alignment & (alignment - 1)) != 0) {
2N/A        errno = EINVAL;
2N/A        return (NULL);
2N/A    }
2N/A
2N/A    /* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */
2N/A    if (alignment <= MTMALLOC_MIN_ALIGN)
2N/A        return (malloc(size));
2N/A
2N/A    alloc_size = size + alignment - MTMALLOC_MIN_ALIGN;
2N/A
2N/A    if (alloc_size < size) { /* overflow */
2N/A        errno = ENOMEM;
2N/A        return (NULL);
2N/A    }
2N/A
2N/A    alloc_buf = malloc(alloc_size);
2N/A
2N/A    if (alloc_buf == NULL)
2N/A        /* malloc sets errno */
2N/A        return (NULL);
2N/A
2N/A    /*
2N/A     * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of
2N/A     * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since
2N/A     * we will use alloc_size to return the excess fragments to the free
2N/A     * list, we also round-up alloc_size if necessary.
2N/A     */
2N/A    if ((alloc_size > MAX_CACHED) &&
2N/A        (alloc_size & (MTMALLOC_MIN_ALIGN - 1)))
2N/A        alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN);
2N/A
2N/A    if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) {
2N/A        /* aligned correctly */
2N/A
2N/A        size_t frag_size = alloc_size -
2N/A            (size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
2N/A
2N/A        /*
2N/A         * If the leftover piece of the memory > MAX_CACHED,
2N/A         * split off the piece and return it back to the freelist.
2N/A         */
2N/A        if (IS_OVERSIZE(frag_size, alloc_size)) {
2N/A            oversize_t *orig, *tail;
2N/A            uintptr_t taddr;
2N/A            size_t data_size;
2N/A            taddr = ALIGN((uintptr_t)alloc_buf + size,
2N/A                MTMALLOC_MIN_ALIGN);
2N/A            data_size = taddr - (uintptr_t)alloc_buf;
2N/A            orig = (oversize_t *)((uintptr_t)alloc_buf -
2N/A                OVSZ_HEADER_SIZE);
2N/A            frag_size = orig->size - data_size -
2N/A                OVSZ_HEADER_SIZE;
2N/A            orig->size = data_size;
2N/A            tail = oversize_header_alloc(taddr, frag_size);
2N/A            free_oversize(tail, 0);
2N/A        }
2N/A        ret_buf = alloc_buf;
2N/A    } else {
2N/A        uchar_t oversize_bits = 0;
2N/A        size_t  head_sz, data_sz, tail_sz;
2N/A        uintptr_t ret_addr, taddr, shift, tshift;
2N/A        oversize_t *orig, *tail, *big;
2N/A        size_t tsize;
2N/A
2N/A        /* needs to be aligned */
2N/A        shift = alignment - offset;
2N/A
2N/A        assert(shift >= MTMALLOC_MIN_ALIGN);
2N/A
2N/A        ret_addr = ((uintptr_t)alloc_buf + shift);
2N/A        ret_buf = (void *)ret_addr;
2N/A
2N/A        if (alloc_size <= MAX_CACHED) {
2N/A            MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf);
2N/A            return (ret_buf);
2N/A        }
2N/A
2N/A        /*
2N/A         * Only check for the fragments when the memory is allocted
2N/A         * from oversize_list.  Split off a fragment and return it
2N/A         * to the oversize freelist when it's > MAX_CACHED.
2N/A         */
2N/A
2N/A        head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE);
2N/A
2N/A        tail_sz = alloc_size -
2N/A            (shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
2N/A
2N/A        oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) |
2N/A            IS_OVERSIZE(size, alloc_size) << DATA_SHIFT |
2N/A            IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT;
2N/A
2N/A        switch (oversize_bits) {
2N/A            case NONE_OVERSIZE:
2N/A            case DATA_OVERSIZE:
2N/A                MEMALIGN_HEADER_ALLOC(ret_addr, shift,
2N/A                    alloc_buf);
2N/A                break;
2N/A            case HEAD_OVERSIZE:
2N/A                /*
2N/A                 * If we can extend data > MAX_CACHED and have
2N/A                 * head still > MAX_CACHED, we split head-end
2N/A                 * as the case of head-end and data oversized,
2N/A                 * otherwise just create memalign header.
2N/A                 */
2N/A                tsize = (shift + size) - (MAX_CACHED + 8 +
2N/A                    MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
2N/A
2N/A                if (!IS_OVERSIZE(tsize, alloc_size)) {
2N/A                    MEMALIGN_HEADER_ALLOC(ret_addr, shift,
2N/A                        alloc_buf);
2N/A                    break;
2N/A                } else {
2N/A                    tsize += OVSZ_HEADER_SIZE;
2N/A                    taddr = ALIGN((uintptr_t)alloc_buf +
2N/A                        tsize, MTMALLOC_MIN_ALIGN);
2N/A                    tshift = ret_addr - taddr;
2N/A                    MEMALIGN_HEADER_ALLOC(ret_addr, tshift,
2N/A                        taddr);
2N/A                    ret_addr = taddr;
2N/A                    shift = ret_addr - (uintptr_t)alloc_buf;
2N/A                }
2N/A                /* FALLTHROUGH */
2N/A            case HEAD_AND_DATA_OVERSIZE:
2N/A                /*
2N/A                 * Split off the head fragment and
2N/A                 * return it back to oversize freelist.
2N/A                 * Create oversize header for the piece
2N/A                 * of (data + tail fragment).
2N/A                 */
2N/A                orig = (oversize_t *)((uintptr_t)alloc_buf -
2N/A                    OVSZ_HEADER_SIZE);
2N/A                big = oversize_header_alloc(ret_addr -
2N/A                    OVSZ_HEADER_SIZE, (orig->size - shift));
2N/A                (void) mutex_lock(&oversize_lock);
2N/A                insert_hash(big);
2N/A                (void) mutex_unlock(&oversize_lock);
2N/A                orig->size = shift - OVSZ_HEADER_SIZE;
2N/A
2N/A                /* free up the head fragment */
2N/A                free_oversize(orig, 1);
2N/A                break;
2N/A            case TAIL_OVERSIZE:
2N/A                /*
2N/A                 * If we can extend data > MAX_CACHED and have
2N/A                 * tail-end still > MAX_CACHED, we split tail
2N/A                 * end, otherwise just create memalign header.
2N/A                 */
2N/A                orig = (oversize_t *)((uintptr_t)alloc_buf -
2N/A                    OVSZ_HEADER_SIZE);
2N/A                tsize =  orig->size - (MAX_CACHED + 8 +
2N/A                    shift + OVSZ_HEADER_SIZE +
2N/A                    MTMALLOC_MIN_ALIGN);
2N/A                if (!IS_OVERSIZE(tsize, alloc_size)) {
2N/A                    MEMALIGN_HEADER_ALLOC(ret_addr, shift,
2N/A                        alloc_buf);
2N/A                    break;
2N/A                } else {
2N/A                    size = MAX_CACHED + 8;
2N/A                }
2N/A                /* FALLTHROUGH */
2N/A            case DATA_AND_TAIL_OVERSIZE:
2N/A                /*
2N/A                 * Split off the tail fragment and
2N/A                 * return it back to oversize freelist.
2N/A                 * Create memalign header and adjust
2N/A                 * the size for the piece of
2N/A                 * (head fragment + data).
2N/A                 */
2N/A                taddr = ALIGN(ret_addr + size,
2N/A                    MTMALLOC_MIN_ALIGN);
2N/A                data_sz = (size_t)(taddr -
2N/A                    (uintptr_t)alloc_buf);
2N/A                orig = (oversize_t *)((uintptr_t)alloc_buf -
2N/A                    OVSZ_HEADER_SIZE);
2N/A                tsize = orig->size - data_sz;
2N/A                orig->size = data_sz;
2N/A                MEMALIGN_HEADER_ALLOC(ret_buf, shift,
2N/A                    alloc_buf);
2N/A                tsize -= OVSZ_HEADER_SIZE;
2N/A                tail = oversize_header_alloc(taddr,  tsize);
2N/A                free_oversize(tail, 0);
2N/A                break;
2N/A            case HEAD_AND_TAIL_OVERSIZE:
2N/A                /*
2N/A                 * Split off the head fragment.
2N/A                 * We try to free up tail-end when we can
2N/A                 * extend data size to (MAX_CACHED + 8)
2N/A                 * and remain tail-end oversized.
2N/A                 * The bottom line is all split pieces
2N/A                 * should be oversize in size.
2N/A                 */
2N/A                orig = (oversize_t *)((uintptr_t)alloc_buf -
2N/A                    OVSZ_HEADER_SIZE);
2N/A                tsize =  orig->size - (MAX_CACHED + 8 +
2N/A                    OVSZ_HEADER_SIZE + shift +
2N/A                    MTMALLOC_MIN_ALIGN);
2N/A
2N/A                if (!IS_OVERSIZE(tsize, alloc_size)) {
2N/A                    /*
2N/A                     * If the chunk is not big enough
2N/A                     * to make both data and tail oversize
2N/A                     * we just keep them as one piece.
2N/A                     */
2N/A                    big = oversize_header_alloc(ret_addr -
2N/A                        OVSZ_HEADER_SIZE,
2N/A                        orig->size - shift);
2N/A                    (void) mutex_lock(&oversize_lock);
2N/A                    insert_hash(big);
2N/A                    (void) mutex_unlock(&oversize_lock);
2N/A                    orig->size = shift - OVSZ_HEADER_SIZE;
2N/A                    free_oversize(orig, 1);
2N/A                    break;
2N/A                } else {
2N/A                    /*
2N/A                     * extend data size > MAX_CACHED
2N/A                     * and handle it as head, data, tail
2N/A                     * are all oversized.
2N/A                     */
2N/A                    size = MAX_CACHED + 8;
2N/A                }
2N/A                /* FALLTHROUGH */
2N/A            case ALL_OVERSIZE:
2N/A                /*
2N/A                 * split off the head and tail fragments,
2N/A                 * return them back to the oversize freelist.
2N/A                 * Alloc oversize header for data seg.
2N/A                 */
2N/A                orig = (oversize_t *)((uintptr_t)alloc_buf -
2N/A                    OVSZ_HEADER_SIZE);
2N/A                tsize = orig->size;
2N/A                orig->size = shift - OVSZ_HEADER_SIZE;
2N/A                free_oversize(orig, 1);
2N/A
2N/A                taddr = ALIGN(ret_addr + size,
2N/A                    MTMALLOC_MIN_ALIGN);
2N/A                data_sz = taddr - ret_addr;
2N/A                assert(tsize > (shift + data_sz +
2N/A                    OVSZ_HEADER_SIZE));
2N/A                tail_sz = tsize -
2N/A                    (shift + data_sz + OVSZ_HEADER_SIZE);
2N/A
2N/A                /* create oversize header for data seg */
2N/A                big = oversize_header_alloc(ret_addr -
2N/A                    OVSZ_HEADER_SIZE, data_sz);
2N/A                (void) mutex_lock(&oversize_lock);
2N/A                insert_hash(big);
2N/A                (void) mutex_unlock(&oversize_lock);
2N/A
2N/A                /* create oversize header for tail fragment */
2N/A                tail = oversize_header_alloc(taddr, tail_sz);
2N/A                free_oversize(tail, 0);
2N/A                break;
2N/A            default:
2N/A                /* should not reach here */
2N/A                assert(0);
2N/A        }
2N/A    }
2N/A    return (ret_buf);
2N/A}
2N/A
2N/A
2N/Avoid *
2N/Avalloc(size_t size)
2N/A{
2N/A    static unsigned pagesize;
2N/A
2N/A    if (size == 0)
2N/A        return (NULL);
2N/A
2N/A    if (!pagesize)
2N/A        pagesize = sysconf(_SC_PAGESIZE);
2N/A
2N/A    return (memalign(pagesize, size));
2N/A}
2N/A
2N/Avoid
2N/Amallocctl(int cmd, long value)
2N/A{
2N/A    static mutex_t mallocctl_lock = DEFAULTMUTEX;
2N/A    (void) mutex_lock(&mallocctl_lock);
2N/A    switch (cmd) {
2N/A
2N/A    case MTDEBUGPATTERN:
2N/A        /*
2N/A         * Reinitialize free blocks in case malloc() is called prior
2N/A         * to mallocctl().
2N/A         */
2N/A        /* reinit is now an atomic counter */
2N/A        if (value && !(debugopt & cmd)) {
2N/A            (void) atomic_inc_uint(&reinit);
2N/A            debugopt |= cmd;
2N/A            reinit_cpu_list();
2N/A            (void) atomic_inc_uint(&reinit);
2N/A        }
2N/A        /*FALLTHRU*/
2N/A    case MTDOUBLEFREE:
2N/A    case MTINITBUFFER:
2N/A        if (value)
2N/A            debugopt |= cmd;
2N/A        else
2N/A            debugopt &= ~cmd;
2N/A        break;
2N/A
2N/A    case MTEXCLUSIVE:
2N/A        (void) atomic_swap_uint(&lockfree_threshold,
2N/A            lockfree_buckets);
2N/A        break;
2N/A
2N/A    case MTCHUNKSIZE:
2N/A        if (value >= MINSIZE && value <= MAXSIZE)
2N/A            requestsize = value;
2N/A        break;
2N/A
2N/A    case MTREALFREE:
2N/A        do_realfree = value;
2N/A        if (do_realfree < 2 && do_realfree != 0)
2N/A            do_realfree = 2;
2N/A        do_realfree = do_realfree*MTPAGESIZE;
2N/A        break;
2N/A
2N/A    default:
2N/A        break;
2N/A    }
2N/A    (void) mutex_unlock(&mallocctl_lock);
2N/A}
2N/A
2N/A/*
2N/A * Initialization function, called from the init section of the library.
2N/A * No locking is required here because we are single-threaded during
2N/A * library initialization.
2N/A */
2N/Astatic uint_t
2N/Afallback_curcpu(void)
2N/A{
2N/A    return (0);
2N/A}
2N/A
2N/A/*
2N/A * Returns non-zero on success, zero on failure.
2N/A *
2N/A * This carefully doesn't set cpu_list until initialization is finished.
2N/A */
2N/Astatic int
2N/Asetup_caches(void)
2N/A{
2N/A    static mutex_t init_lock = DEFAULTMUTEX;
2N/A
2N/A    uintptr_t oldbrk;
2N/A    uintptr_t newbrk;
2N/A
2N/A    size_t cache_space_needed;
2N/A    size_t padding;
2N/A
2N/A    curcpu_func new_curcpu;
2N/A    uint_t new_cpu_mask;
2N/A    percpu_t *new_cpu_list;
2N/A
2N/A    uint_t i, j;
2N/A    uintptr_t list_addr;
2N/A
2N/A    int do_MTEXCLUSIVE = 0;
2N/A    int change_MTCHUNKSIZE = 0;
2N/A    int change_MTDEBUGPATTERN = 0;
2N/A    int new_chunk;
2N/A    char *mtmalloc_options = NULL;
2N/A    char *fnd_equal, *fnd_comma, *prev, *last;
2N/A    char opt[MBUFSZ];
2N/A    char val[MBUFSZ];
2N/A
2N/A    (void) mutex_lock(&init_lock);
2N/A    if (cpu_list != NULL) {
2N/A        (void) mutex_unlock(&init_lock);
2N/A        return (1);         /* success -- already initialized */
2N/A    }
2N/A
2N/A    /* access MTMALLOC_OPTIONS from the environment */
2N/A    MTPAGESIZE = sysconf(_SC_PAGESIZE);
2N/A    mtmalloc_options = getenv("MTMALLOC_OPTIONS");
2N/A    if (mtmalloc_options != NULL) {
2N/A        /*
2N/A         * we have options, but we have to apply them in order
2N/A         * MTMAXCACHE goes first so we can setup the caches
2N/A         * after that we can accept MTEXCLUSIVE or MTCHUNKSIZE
2N/A         */
2N/A        last = &(mtmalloc_options[strlen(mtmalloc_options)]);
2N/A        prev = mtmalloc_options;
2N/A        if (strlen(mtmalloc_options) < MBUFSZ) { /* else bad variable */
2N/A            while (prev < last) {
2N/A                /* environment variable length OK */
2N/A                fnd_equal = index(prev, '=');
2N/A                if (fnd_equal == NULL)
2N/A                    break;
2N/A                fnd_comma = index(fnd_equal+1, ',');
2N/A                if (fnd_comma == NULL)
2N/A                    fnd_comma = last;
2N/A                (void) strncpy(opt, prev, fnd_equal - prev);
2N/A                opt[fnd_equal - prev] = '\0';
2N/A                (void) strncpy(val, fnd_equal + 1,
2N/A                    fnd_comma - fnd_equal -1);
2N/A                val[fnd_comma - fnd_equal -1 ] = '\0';
2N/A                if (strcmp(opt, "MTMAXCACHE") == 0) {
2N/A                    MAX_CACHED_SHIFT = atoi(val);
2N/A                } else if (strcmp(opt, "MTEXCLUSIVE") == 0) {
2N/A                    if (val[0] == 'Y' ||
2N/A                        val[0] == 'y') {
2N/A                        do_MTEXCLUSIVE = 1;
2N/A                    }
2N/A                } else if (strcmp(opt, "MTCHUNKSIZE") == 0) {
2N/A                    change_MTCHUNKSIZE = 1;
2N/A                    new_chunk = atoi(val);
2N/A                } else if (strcmp(opt, "MTREALFREE") == 0) {
2N/A                    do_realfree = atol(val);
2N/A                    if (do_realfree < 2 && do_realfree != 0)
2N/A                        do_realfree = 2;
2N/A                    do_realfree = do_realfree*MTPAGESIZE;
2N/A                } else if (strcmp(opt, "MTDEBUGPATTERN") == 0) {
2N/A                    if (val[0] == 'Y' || val[0] == 'y') {
2N/A                        change_MTDEBUGPATTERN = 1;
2N/A                    }
2N/A                } else if (strcmp(opt, "MTDOUBLEFREE") == 0) {
2N/A                    if (val[0] == 'Y' || val[0] == 'y') {
2N/A                        debugopt |= MTDOUBLEFREE;
2N/A                    }
2N/A                } else if (strcmp(opt, "MTINITBUFFER") == 0) {
2N/A                    if (val[0] == 'Y' || val[0] == 'y') {
2N/A                        debugopt |= MTINITBUFFER;
2N/A                    }
2N/A                }
2N/A                prev = fnd_comma+1;
2N/A            }
2N/A        }
2N/A    } /* end of decode MTMALLOC_OPTIONS */
2N/A
2N/A    if (MAX_CACHED_SHIFT < 16)
2N/A        MAX_CACHED_SHIFT = 16;
2N/A    if (MAX_CACHED_SHIFT > 21)
2N/A        MAX_CACHED_SHIFT = 21;
2N/A    switch (MAX_CACHED_SHIFT) {
2N/A        case 16:
2N/A            MINSIZE = 9;
2N/A            break;
2N/A        case 17:
2N/A            MINSIZE = 17;
2N/A            break;
2N/A        case 18:
2N/A            MINSIZE = 41;
2N/A            break;
2N/A        case 19:
2N/A            MINSIZE = 81;
2N/A            break;
2N/A        case 20:
2N/A            MINSIZE = 137;
2N/A            break;
2N/A        case 21:
2N/A            MINSIZE = 265;
2N/A            break;
2N/A        default:
2N/A            MINSIZE = 9;
2N/A    }
2N/A#ifdef _LP64
2N/A    if (MINSIZE < 64)
2N/A        MINSIZE = 64;
2N/A#endif
2N/A    requestsize = MINSIZE;
2N/A    MAX_CACHED = (1 << MAX_CACHED_SHIFT);
2N/A    NUM_CACHES = (MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1);
2N/A    CACHELIST_SIZE = ALIGN(NUM_CACHES * sizeof (cache_head_t),
2N/A        CACHE_COHERENCY_UNIT);
2N/A    /* end of MTMALLOC_OPTIONS section */
2N/A
2N/A    /*
2N/A     * Get a decent "current cpu identifier", to be used to reduce
2N/A     * contention.  Eventually, this should be replaced by an interface
2N/A     * to get the actual CPU sequence number in libthread/liblwp.
2N/A     */
2N/A
2N/A    new_curcpu = get_curcpu_func();
2N/A    if (new_curcpu == NULL) {
2N/A        new_curcpu = fallback_curcpu;
2N/A        ncpus = 1;
2N/A    } else {
2N/A        if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0)
2N/A            ncpus = 4; /* decent default value */
2N/A    }
2N/A    assert(ncpus > 0);
2N/A
2N/A    /* round ncpus up to a power of 2 */
2N/A    while (ncpus & (ncpus - 1))
2N/A        ncpus++;
2N/A
2N/A    new_cpu_mask = ncpus - 1;   /* create the cpu mask */
2N/A
2N/A    /* allocate additional percpu structures for nocache locks */
2N/A    lockfree_buckets = ncpus;
2N/A    ncpus += lockfree_buckets;
2N/A
2N/A    /*
2N/A     * We now do some magic with the brk.  What we want to get in the
2N/A     * end is a bunch of well-aligned stuff in a big initial allocation.
2N/A     * Along the way, we do sanity checks to make sure no one else has
2N/A     * touched the brk (which shouldn't happen, but it's always good to
2N/A     * check)
2N/A     *
2N/A     * First, make sure sbrk is sane, and store the current brk in oldbrk.
2N/A     */
2N/A    oldbrk = (uintptr_t)sbrk(0);
2N/A    if ((void *)oldbrk == (void *)-1) {
2N/A        (void) mutex_unlock(&init_lock);
2N/A        return (0); /* sbrk is broken -- we're doomed. */
2N/A    }
2N/A
2N/A    /*
2N/A     * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that
2N/A     * the percpu structures and cache lists will be properly aligned.
2N/A     *
2N/A     *   2.  All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE,
2N/A     *  so they can be paged out individually.
2N/A     */
2N/A    newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT);
2N/A    if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk) {
2N/A        (void) mutex_unlock(&init_lock);
2N/A        return (0); /* someone else sbrked */
2N/A    }
2N/A
2N/A    /*
2N/A     * For each cpu, there is one percpu_t and a list of caches
2N/A     */
2N/A    cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE);
2N/A
2N/A    new_cpu_list = (percpu_t *)sbrk(cache_space_needed);
2N/A
2N/A    if (new_cpu_list == (percpu_t *)-1 ||
2N/A        (uintptr_t)new_cpu_list != newbrk) {
2N/A        (void) mutex_unlock(&init_lock);
2N/A        return (0); /* someone else sbrked */
2N/A    }
2N/A
2N/A    /*
2N/A     * Finally, align the brk to HUNKSIZE so that all hunks are
2N/A     * page-aligned, to avoid edge-effects.
2N/A     */
2N/A
2N/A    newbrk = (uintptr_t)new_cpu_list + cache_space_needed;
2N/A
2N/A    padding = ALIGN(newbrk, HUNKSIZE) - newbrk;
2N/A
2N/A    if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk) {
2N/A        (void) mutex_unlock(&init_lock);
2N/A        return (0); /* someone else sbrked */
2N/A    }
2N/A
2N/A    list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus));
2N/A
2N/A    /* initialize the percpu list */
2N/A    for (i = 0; i < ncpus; i++) {
2N/A        new_cpu_list[i].mt_caches = (cache_head_t *)list_addr;
2N/A        for (j = 0; j < NUM_CACHES; j++) {
2N/A        new_cpu_list[i].mt_caches[j].mt_cachespaceblockhint = NULL;
2N/A        new_cpu_list[i].mt_caches[j].mt_cachespaceblock = NULL;
2N/A        new_cpu_list[i].mt_caches[j].mt_hint = NULL;
2N/A        }
2N/A
2N/A        (void) mutex_init(&new_cpu_list[i].mt_parent_lock,
2N/A            USYNC_THREAD, NULL);
2N/A
2N/A        /* get the correct cache list alignment */
2N/A        list_addr += CACHELIST_SIZE;
2N/A    }
2N/A
2N/A    /*
2N/A     * now install the global variables, leaving cpu_list for last, so that
2N/A     * there aren't any race conditions.
2N/A     */
2N/A    curcpu = new_curcpu;
2N/A    cpu_mask = new_cpu_mask;
2N/A    cpu_list = new_cpu_list;
2N/A    exclusive_list = &new_cpu_list[new_cpu_mask + 1];
2N/A
2N/A    /* the following code support the use ot MTMALLOC_OPTIONS */
2N/A    if (do_MTEXCLUSIVE) {
2N/A        lockfree_threshold = lockfree_buckets;
2N/A    }
2N/A    if (change_MTCHUNKSIZE) {
2N/A        if (new_chunk >= MINSIZE && new_chunk <= MAXSIZE)
2N/A            requestsize = new_chunk;
2N/A    }
2N/A    if (change_MTDEBUGPATTERN) {
2N/A        /* reinit is now an atomic counter */
2N/A        (void) atomic_inc_uint(&reinit);
2N/A        debugopt |= MTDEBUGPATTERN;
2N/A        reinit_cpu_list();
2N/A        (void) atomic_inc_uint(&reinit);
2N/A    }
2N/A    /* end of MTMALLOC_OPTIONS section */
2N/A
2N/A    (void) mutex_unlock(&init_lock);
2N/A
2N/A    return (1);
2N/A}
2N/A
2N/Astatic void
2N/Acreate_cache(cache_t *cp, size_t size, uint_t chunksize, volatile uint_t *cs)
2N/A{
2N/A    volatile ulong_t *freemask;
2N/A    const size_t span = chunksize * HUNKSIZE;
2N/A    const size_t nfree_max =
2N/A        (span - sizeof (cache_t) - sizeof (ulong_t)) / size;
2N/A    const size_t nmasks =
2N/A        4 * ALIGN(nfree_max, 4 * FREEMASK_BITS) / (4 * FREEMASK_BITS);
2N/A    uint_t nfree;
2N/A
2N/A    /*
2N/A     * We've got span bytes starting at cp.  The order is:
2N/A     *   cache_t
2N/A     *   ulong_t freemask[nmasks];
2N/A     *   caddr_t arena;     the remaining storage
2N/A     */
2N/A    cp->mt_size = size;
2N/A    cp->mt_hunks = chunksize;
2N/A    cp->mt_freemask = (ulong_t *)(cp + 1);
2N/A    cp->mt_efreemask = &cp->mt_freemask[nmasks];
2N/A    cp->mt_arena = (caddr_t)ALIGN((uintptr_t)cp->mt_efreemask,
2N/A        CACHE_COHERENCY_UNIT);
2N/A    cp->mt_length = span - (cp->mt_arena - (caddr_t)cp);
2N/A    cp->mt_nfreeptr = cs;
2N/A    *(cp->mt_nfreeptr) = cp->mt_length / size;
2N/A    cp->mt_lastoffset = 0;
2N/A
2N/A    /* Now that that's all set up, set up the freemask */
2N/A    freemask = cp->mt_freemask;
2N/A    nfree = *(cp->mt_nfreeptr);
2N/A    while (nfree >= FREEMASK_BITS) {
2N/A        *freemask++ = ~(ulong_t)0;
2N/A        nfree -= FREEMASK_BITS;
2N/A    }
2N/A    if (nfree > 0) {
2N/A        *freemask++ = ((ulong_t)1 << nfree) - 1;
2N/A    }
2N/A    assert(freemask <= cp->mt_efreemask);
2N/A
2N/A    if (debugopt & MTDEBUGPATTERN) {
2N/A        copy_pattern(FREEPATTERN,
2N/A            cp->mt_arena, cp->mt_size * *(cp->mt_nfreeptr));
2N/A    }
2N/A}
2N/A
2N/Astatic void
2N/Areinit_cpu_list(void)
2N/A{
2N/A    oversize_t *wp = oversize_list.next_bysize;
2N/A    percpu_t *cpuptr;
2N/A    cache_t *thiscache;
2N/A    cache_head_t *cachehead;
2N/A    cachespaceblock_t *thisblock;
2N/A    int i;
2N/A
2N/A    if (wp == NULL || cpu_list == NULL) {
2N/A        reinit = 0;
2N/A        return;
2N/A    }
2N/A
2N/A    /* Reinitialize free oversize blocks. */
2N/A    (void) mutex_lock(&oversize_lock);
2N/A    if (debugopt & MTDEBUGPATTERN)
2N/A        for (; wp != &oversize_list; wp = wp->next_bysize)
2N/A            copy_pattern(FREEPATTERN, wp->addr, wp->size);
2N/A    (void) mutex_unlock(&oversize_lock);
2N/A
2N/A    /* Reinitialize free blocks. */
2N/A    /* for each cpuptr */
2N/A    for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
2N/A        (void) mutex_lock(&cpuptr->mt_parent_lock);
2N/A        /* for each of the  NUM_CACHES power of two based caches */
2N/A        for (cachehead = &cpuptr->mt_caches[0]; cachehead <
2N/A            &cpuptr->mt_caches[NUM_CACHES]; cachehead++) {
2N/A
2N/A        /* while we have arrays of caches */
2N/A        thisblock = cachehead->mt_cachespaceblock;
2N/A        while (thisblock != NULL) {
2N/A            for (i = 0; i < CACHESPACES; i++) {
2N/A            if (thisblock->mt_nfree[i] != 0) {
2N/A                thiscache = thisblock->mt_cache[i];
2N/A                reinit_cache(thiscache);
2N/A            }
2N/A            }
2N/A            thisblock = thisblock->mt_nextblock;
2N/A        }
2N/A        }
2N/A        (void) mutex_unlock(&cpuptr->mt_parent_lock);
2N/A    }
2N/A}
2N/A
2N/Astatic void
2N/Areinit_cache(cache_t *thiscache)
2N/A{
2N/A    const size_t size = thiscache->mt_size;
2N/A    volatile ulong_t *sfreemask = thiscache->mt_freemask;
2N/A    volatile ulong_t *efreemask = thiscache->mt_efreemask;
2N/A    volatile ulong_t *freemask;
2N/A    ulong_t mask;
2N/A
2N/A    for (freemask = sfreemask; freemask < efreemask; freemask++) {
2N/A        caddr_t buf = thiscache->mt_arena +
2N/A            (freemask - sfreemask) * FREEMASK_BITS * size;
2N/A
2N/A        for (mask = *freemask; mask != 0; mask >>= 1) {
2N/A            if (mask & 0x1) {
2N/A                copy_pattern(FREEPATTERN,
2N/A                    buf + OVERHEAD, size - OVERHEAD);
2N/A            }
2N/A            buf += size;
2N/A        }
2N/A    }
2N/A
2N/A}
2N/A
2N/A/*  locks_used tells us whether to use per bucket locks */
2N/Astatic void *
2N/Amalloc_internal(size_t size, percpu_t *cpuptr, uint_t locks_used)
2N/A{
2N/A    cache_head_t *chead;
2N/A    cache_t *thiscache;
2N/A    cachespaceblock_t *curcspaceblk;
2N/A    cachespaceblock_t *hintcspaceblock;
2N/A    uint_t cacheindex;
2N/A    int32_t i, n, logsz, bucket;
2N/A    int dbg_pattern = 0;
2N/A    volatile ulong_t *freemask, *efreemask, *sfreemask;
2N/A    ulong_t mask;
2N/A    size_t offset;
2N/A    uint32_t old_reinit;
2N/A    caddr_t ret;
2N/A    long thisrequest;
2N/A    int32_t buffer_size;
2N/A
2N/A    logsz = MIN_CACHED_SHIFT;
2N/A
2N/A    while (size > (1 << logsz))
2N/A        logsz++;
2N/A
2N/A    bucket = logsz - MIN_CACHED_SHIFT;
2N/A
2N/A    if (locks_used) {
2N/A        (void) mutex_lock(&cpuptr->mt_parent_lock);
2N/A    }
2N/A
2N/A    /*
2N/A     * Find a cache of the appropriate size with free buffers.
2N/A     *
2N/A     * We don't need to lock each cache as we check their mt_nfree
2N/A     * count,
2N/A     * since:
2N/A     *  1. We are only looking for caches with mt_nfree > 0.  If a
2N/A     *   free happens during our search, it will increment mt_nfree,
2N/A     *   which will not effect the test.
2N/A     *  2. Allocations can decrement mt_nfree, but they can't happen
2N/A     *     as long as we hold mt_parent_lock.
2N/A     */
2N/A
2N/A    chead = &cpuptr->mt_caches[bucket];
2N/A
2N/A    /*
2N/A     * Search through the list, starting at the mt_hint
2N/A     * We have a linked list of blocks, each block with
2N/A     * a default 512 caches
2N/A     */
2N/A    if (chead->mt_cachespaceblockhint == NULL) {
2N/A        /* First time this has been used */
2N/A        /* allocating space for 512(default) caches */
2N/A        chead->mt_cachespaceblock =
2N/A            (cachespaceblock_t *)morecore(sizeof (cachespaceblock_t));
2N/A        if (chead->mt_cachespaceblock == (cachespaceblock_t *)-1) {
2N/A            if (locks_used)
2N/A                (void) mutex_unlock(&cpuptr->mt_parent_lock);
2N/A            errno = ENOMEM;
2N/A            return (NULL);
2N/A        }
2N/A        (void) memset(chead->mt_cachespaceblock, 0,
2N/A            sizeof (cachespaceblock_t));
2N/A        /* Set up hint, the address of the first mt_cachespaceblock */
2N/A        chead->mt_cachespaceblockhint =
2N/A            chead->mt_cachespaceblock;
2N/A    }
2N/A
2N/A    /*
2N/A     * at this point we always have a hint
2N/A     * Find a cache with free space
2N/A     */
2N/A    hintcspaceblock = chead->mt_cachespaceblockhint;
2N/A    cacheindex = chead->mt_hint; /* 0 first time through */
2N/A    thiscache = hintcspaceblock->mt_cache[cacheindex];
2N/A
2N/A    if (hintcspaceblock->mt_nfree[cacheindex] == 0) {
2N/A        /*
2N/A         * No space in most recently used cache,
2N/A         * need to locate a new cache
2N/A         */
2N/A        cacheindex = 0xffffffff;
2N/A        /* starting at the beginning */
2N/A        for (i = 0; i < CACHESPACES; i++) {
2N/A            if (hintcspaceblock->mt_nfree[i] != 0) {
2N/A                /* Found cache */
2N/A                cacheindex = i;
2N/A                /* Record hint for next time */
2N/A                chead->mt_hint = cacheindex;
2N/A                thiscache = hintcspaceblock->mt_cache[i];
2N/A                break;
2N/A            }
2N/A        }
2N/A        if (cacheindex == 0xffffffff) {
2N/A            /*
2N/A             * Still unable to locate a cache with space
2N/A             * so need to scan
2N/A             * cache space blocks
2N/A             *
2N/A             * Start searching from the block after the
2N/A             * hint block
2N/A             */
2N/A            curcspaceblk =
2N/A                hintcspaceblock->mt_nextblock;
2N/A            while ((curcspaceblk != NULL) &&
2N/A                (cacheindex == 0xffffffff)) {
2N/A                /*
2N/A                 * The following code assumes that
2N/A                 * curcspaceblk->mt_nfree[0]
2N/A                 * is eight byte aligned
2N/A                 */
2N/A                u_longlong_t *llnfree = (u_longlong_t *)
2N/A                    &curcspaceblk->mt_nfree[0];
2N/A                for (i = 0; i < CACHESPACES; i += 2) {
2N/A                    if (*llnfree != 0) {
2N/A                        /*
2N/A                         * if (curcspaceblk->
2N/A                         * mt_nfree[i] != 0)
2N/A                         * Check whether it is the
2N/A                         * first in the pair
2N/A                         */
2N/A                        if (curcspaceblk->
2N/A                            mt_nfree[i] == 0) {
2N/A                            i++;
2N/A                        }
2N/A                        /* Found cache */
2N/A                        cacheindex = i;
2N/A                        /* Record hint for next time */
2N/A                        chead->mt_hint = cacheindex;
2N/A                        hintcspaceblock =
2N/A                            curcspaceblk;
2N/A                        chead->mt_cachespaceblockhint =
2N/A                            curcspaceblk;
2N/A                        thiscache = hintcspaceblock
2N/A                            ->mt_cache[i];
2N/A                        break;
2N/A                    }
2N/A                    llnfree++; /* Next pair of elements */
2N/A                }
2N/A                /* Scan next block */
2N/A                curcspaceblk =
2N/A                    curcspaceblk->mt_nextblock;
2N/A            }
2N/A        }
2N/A        if (cacheindex == 0xffffffff) {
2N/A            /*
2N/A             * Still unable to locate a cache with space so need to
2N/A             * scan all cache space blocks
2N/A             * Start scanning from top
2N/A             */
2N/A            curcspaceblk = chead->mt_cachespaceblock;
2N/A            /* Scan until we reach the last hint block */
2N/A            while ((curcspaceblk !=
2N/A                hintcspaceblock) && (cacheindex == 0xffffffff)) {
2N/A                /*
2N/A                 * The following code assumes that
2N/A                 * curcspaceblk->mt_nfree[0]
2N/A                 * is eight byte aligned
2N/A                 */
2N/A                u_longlong_t *llnfree = (u_longlong_t *)
2N/A                    &curcspaceblk->mt_nfree[0];
2N/A                for (i = 0; i < CACHESPACES; i += 2) {
2N/A                    if (*llnfree != 0) {
2N/A                        /*
2N/A                         * if (curcspaceblk->
2N/A                         * mt_nfree[i] != 0)
2N/A                         * Check whether it is the
2N/A                         * first in the pair
2N/A                         */
2N/A                        if (curcspaceblk->
2N/A                            mt_nfree[i] == 0) {
2N/A                            i++;
2N/A                        }
2N/A                        /* Found cache */
2N/A                        cacheindex = i;
2N/A                        /* Record hint for next time */
2N/A                        chead->mt_hint = cacheindex;
2N/A                        hintcspaceblock = curcspaceblk;
2N/A                        chead->mt_cachespaceblockhint
2N/A                            = curcspaceblk;
2N/A                        thiscache =
2N/A                            hintcspaceblock->
2N/A                            mt_cache[i];
2N/A                        break;
2N/A                    }
2N/A                    llnfree++; /* Next pair of elements */
2N/A                }
2N/A                /* Scan next block */
2N/A                curcspaceblk =
2N/A                    curcspaceblk->mt_nextblock;
2N/A            }
2N/A        }
2N/A        if (cacheindex == 0xffffffff) {
2N/A            /*
2N/A             * Unable to locate any free space need to
2N/A             * add a new cache
2N/A             */
2N/A            for (i = 0; i < CACHESPACES; i++) {
2N/A                if (chead->mt_cachespaceblock->mt_cache[i]
2N/A                    == 0) {
2N/A                    /* Found location to make cache */
2N/A                    cacheindex = i;
2N/A                    /* Set up hints */
2N/A                    chead->mt_cachespaceblockhint =
2N/A                        chead->mt_cachespaceblock;
2N/A                    hintcspaceblock =
2N/A                        chead->mt_cachespaceblock;
2N/A                    chead->mt_hint = i;
2N/A                    break;
2N/A                }
2N/A            }
2N/A            if (cacheindex == 0xffffffff) {
2N/A                /* Entire cache space block has been used up */
2N/A                /* Allocate new one */
2N/A                curcspaceblk = (cachespaceblock_t *)
2N/A                    morecore(sizeof (cachespaceblock_t));
2N/A                if (curcspaceblk ==
2N/A                    (cachespaceblock_t *)-1) {
2N/A                    if (locks_used)
2N/A                        (void) mutex_unlock(
2N/A                            &cpuptr->mt_parent_lock);
2N/A                    errno = ENOMEM;
2N/A                    return (NULL);
2N/A                }
2N/A                /* zero it out */
2N/A                (void) memset(curcspaceblk, 0,
2N/A                    sizeof (cachespaceblock_t));
2N/A                /* Insert at top of list */
2N/A                curcspaceblk->mt_nextblock =
2N/A                    chead->mt_cachespaceblock;
2N/A                chead->mt_cachespaceblock =
2N/A                    curcspaceblk;
2N/A                /* Set up hints */
2N/A                chead->mt_cachespaceblockhint =
2N/A                    curcspaceblk;
2N/A                hintcspaceblock = curcspaceblk;
2N/A                chead->mt_hint = 0;
2N/A                cacheindex = 0; /* Start from the beginning */
2N/A            }
2N/A            /* Now make the new cache */
2N/A            thisrequest = requestsize;
2N/A            buffer_size = (1 << logsz) + OVERHEAD;
2N/A            thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE);
2N/A            if (thiscache == (cache_t *)-1) {
2N/A                if (locks_used)
2N/A                    (void) mutex_unlock(
2N/A                        &cpuptr->mt_parent_lock);
2N/A                errno = EAGAIN;
2N/A                return (NULL);
2N/A            }
2N/A            create_cache(thiscache, buffer_size, thisrequest,
2N/A                &hintcspaceblock->mt_nfree[cacheindex]);
2N/A            hintcspaceblock->mt_cache[cacheindex] = thiscache;
2N/A        } /* New cache created */
2N/A    }
2N/A
2N/A    /* thiscache now points to a cache with available space */
2N/A    sfreemask = thiscache->mt_freemask;
2N/A    efreemask = thiscache->mt_efreemask;
2N/A    offset = thiscache->mt_lastoffset;
2N/A
2N/A    freemask = &sfreemask[offset];
2N/A    if (*freemask == 0) {
2N/A        int looped = 0;
2N/A        /*
2N/A         * We allocate freemasks in multiples of four, so we can
2N/A         * unroll the search safely.  There should be at least
2N/A         * one set bit, so we should never abort().  We start
2N/A         * at a multiple of four near the last offset.
2N/A         */
2N/A        freemask = &sfreemask[((offset + 1) / 4) * 4];
2N/A        for (;;) {
2N/A            if (freemask >= efreemask) {
2N/A                /* make sure we only loop around once */
2N/A                freemask = sfreemask;
2N/A                if (looped) {
2N/A                /* no free bit found */
2N/A                    abort();
2N/A                }
2N/A                looped = 1;
2N/A            }
2N/A            if (*freemask++ != 0 || *freemask++ != 0 ||
2N/A                *freemask++ != 0 || *freemask++ != 0) {
2N/A                break; /* found free bit */
2N/A            }
2N/A        }
2N/A        freemask--; /* undo the post-increment */
2N/A        thiscache->mt_lastoffset = (freemask - sfreemask);
2N/A    }
2N/A
2N/A    /* find the lowest set bit by binary search */
2N/A    i = 0;
2N/A    mask = *freemask;
2N/A#ifdef _LP64
2N/A    if (!(mask & 0xffffffff))   { i += 32;  mask >>= 32; }
2N/A#endif
2N/A    if (!(mask & 0xffff))       { i += 16;  mask >>= 16; }
2N/A    if (!(mask & 0xff))     { i += 8;   mask >>= 8; }
2N/A    if (!(mask & 0xf))      { i += 4;   mask >>= 4; }
2N/A    if (!(mask & 0x3))      { i += 2;   mask >>= 2; }
2N/A    if (!(mask & 0x1))      { i += 1;   /* mask is unused */ }
2N/A
2N/A    /* load the value of reinit() before changing the free mask */
2N/A    old_reinit = reinit;
2N/A    dbg_pattern = (debugopt & MTDEBUGPATTERN);
2N/A
2N/A    /* and clear the bit, marking the buffer allocated */
2N/A    if (atomic_clear_long_excl(freemask, i) != 0) {
2N/A        /* someone already allocated buffer */
2N/A        abort();    /* someone already allocated it! */
2N/A    }
2N/A    (void) atomic_dec_uint(thiscache->mt_nfreeptr);
2N/A
2N/A    if (locks_used) {
2N/A        (void) mutex_unlock(&cpuptr->mt_parent_lock);
2N/A        if (dbg_pattern) {
2N/A            if (REINIT(old_reinit) || old_reinit != reinit) {
2N/A                dbg_pattern = 0;
2N/A            }
2N/A        }
2N/A    } else {
2N/A        /*
2N/A         * This comparison will fail if REINIT() could have been
2N/A         * true before we cleared the bit in freeblocks.  If it
2N/A         * could have been, the reinit thread may be in the middle
2N/A         * of processing our cache, and could have seen our block
2N/A         * as free.
2N/A         * If reinit is odd then we are in reinit_cpu_list
2N/A         * If reinit is even then we are NOT in reinit_cpu_list
2N/A         * If reinit is zero then we were never in reinit_cpu_list
2N/A         * REINIT() is true if odd, in reinit_cpu_list
2N/A         *
2N/A         * We must not return the buffer to our caller until the
2N/A         * reinit thread is guaranteed to not be touching it.  The
2N/A         * easiest way to do so is to grab and drop our percpu's
2N/A         * mt_parent lock.  Since the re-init thread holds the
2N/A         * percpu lock while it processes the cache, this guarantees
2N/A         * that either:
2N/A         *
2N/A         *  1. The reinit thread has not yet processed our percpu, or
2N/A         *  2. the reinit thread has already processed our percpu.
2N/A         *
2N/A         * In either case, he will not be overwriting our buffer,
2N/A         * so it is safe to return it to our caller.  Regardless,
2N/A         * since the initialization state of the buffer is not known,
2N/A         * we skip debug pattern checking.
2N/A         */
2N/A        if (REINIT(old_reinit) || old_reinit != reinit) {
2N/A            dbg_pattern = 0;
2N/A            (void) mutex_lock(&cpuptr->mt_parent_lock);
2N/A            (void) mutex_unlock(&cpuptr->mt_parent_lock);
2N/A        }
2N/A    }
2N/A
2N/A    n = thiscache->mt_size *
2N/A        (FREEMASK_BITS * (freemask - thiscache->mt_freemask) + i);
2N/A    /*
2N/A     * Now you have the offset in n, you've changed the free mask
2N/A     * in the freelist. Nothing left to do but find the block
2N/A     * in the arena and put the value of thiscache in the word
2N/A     * ahead of the handed out address and return the memory
2N/A     * back to the user.
2N/A     */
2N/A    ret = thiscache->mt_arena + n;
2N/A
2N/A    /* make sure we don't hand out memory not from this cache */
2N/A    assert(n < thiscache->mt_length);
2N/A
2N/A    *(uintptr_t *)ret = (uintptr_t)thiscache;
2N/A    ret += OVERHEAD;
2N/A
2N/A    assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
2N/A
2N/A    /*
2N/A     * the dbg_pattern is zero if we never called
2N/A     * mallocctl(MTDEBUGPATTERN
2N/A     * or if reinit was odd and we were not using locks
2N/A     * dbg_pattern is set if we were using locks and
2N/A     * mallocctl(MTDEBUGPATTERN)
2N/A     * has been called and mallocctl is not currently executing
2N/A     */
2N/A    if (dbg_pattern)
2N/A        if (verify_pattern(FREEPATTERN, ret, size)) {
2N/A            /* reference after free */
2N/A            abort();
2N/A        }
2N/A
2N/A    if (debugopt & MTINITBUFFER)
2N/A        copy_pattern(INITPATTERN, ret, size);
2N/A    return ((void *)ret);
2N/A}
2N/A
2N/Astatic void *
2N/Amorecore(size_t bytes)
2N/A{
2N/A    void * ret;
2N/A
2N/A    if (bytes > LONG_MAX) {
2N/A        intptr_t wad;
2N/A        /*
2N/A         * The request size is too big. We need to do this in
2N/A         * chunks. Sbrk only takes an int for an arg.
2N/A         */
2N/A        if (bytes == ULONG_MAX)
2N/A            return ((void *)-1);
2N/A
2N/A        ret = sbrk(0);
2N/A        wad = LONG_MAX;
2N/A        while (wad > 0) {
2N/A            if (sbrk(wad) == (void *)-1) {
2N/A                if (ret != sbrk(0))
2N/A                    (void) sbrk(-LONG_MAX);
2N/A                return ((void *)-1);
2N/A            }
2N/A            bytes -= LONG_MAX;
2N/A            wad = bytes;
2N/A        }
2N/A    } else
2N/A        ret = sbrk(bytes);
2N/A
2N/A    return (ret);
2N/A}
2N/A
2N/A
2N/Astatic void *
2N/Aoversize(size_t size)
2N/A{
2N/A    caddr_t ret;
2N/A    oversize_t *big;
2N/A
2N/A    /* make sure we will not overflow */
2N/A    if (size > MAX_MTMALLOC) {
2N/A        errno = ENOMEM;
2N/A        return (NULL);
2N/A    }
2N/A
2N/A    /*
2N/A     * Since we ensure every address we hand back is
2N/A     * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the
2N/A     * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends.
2N/A     * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN,
2N/A     * particularly where coalescing occurs.
2N/A     */
2N/A    size = ALIGN(size, MTMALLOC_MIN_ALIGN);
2N/A
2N/A    /*
2N/A     * The idea with the global lock is that we are sure to
2N/A     * block in the kernel anyway since given an oversize alloc
2N/A     * we are sure to have to call morecore();
2N/A     */
2N/A    (void) mutex_lock(&oversize_lock);
2N/A
2N/A    if ((big = find_oversize(size)) != NULL) {
2N/A        if (REINIT(reinit) && (debugopt & MTDEBUGPATTERN))
2N/A            if (verify_pattern(FREEPATTERN, big->addr, size))
2N/A                abort();    /* reference after free */
2N/A    } else {
2N/A        /* Get more 8-byte aligned memory from heap */
2N/A        ret = morecore(size + OVSZ_HEADER_SIZE);
2N/A        if (ret == (caddr_t)-1) {
2N/A            (void) mutex_unlock(&oversize_lock);
2N/A            errno = ENOMEM;
2N/A            return (NULL);
2N/A        }
2N/A        big = oversize_header_alloc((uintptr_t)ret, size);
2N/A    }
2N/A    ret = big->addr;
2N/A
2N/A    /* Add big to the hash table at the head of the relevant bucket. */
2N/A    insert_hash(big);
2N/A
2N/A    if (debugopt & MTINITBUFFER)
2N/A        copy_pattern(INITPATTERN, ret, size);
2N/A
2N/A    (void) mutex_unlock(&oversize_lock);
2N/A    assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
2N/A    return ((void *)ret);
2N/A}
2N/A
2N/Astatic void
2N/Ainsert_oversize(oversize_t *op, oversize_t *nx)
2N/A{
2N/A    oversize_t *sp;
2N/A
2N/A    /* locate correct insertion point in size-ordered list */
2N/A    for (sp = oversize_list.next_bysize;
2N/A        sp != &oversize_list && (op->size > sp->size);
2N/A        sp = sp->next_bysize)
2N/A        ;
2N/A
2N/A    /* link into size-ordered list */
2N/A    op->next_bysize = sp;
2N/A    op->prev_bysize = sp->prev_bysize;
2N/A    op->prev_bysize->next_bysize = op;
2N/A    op->next_bysize->prev_bysize = op;
2N/A
2N/A    /*
2N/A     * link item into address-ordered list
2N/A     * (caller provides insertion point as an optimization)
2N/A     */
2N/A    op->next_byaddr = nx;
2N/A    op->prev_byaddr = nx->prev_byaddr;
2N/A    op->prev_byaddr->next_byaddr = op;
2N/A    op->next_byaddr->prev_byaddr = op;
2N/A
2N/A}
2N/A
2N/Astatic void
2N/Aunlink_oversize(oversize_t *lp)
2N/A{
2N/A    /* unlink from address list */
2N/A    lp->prev_byaddr->next_byaddr = lp->next_byaddr;
2N/A    lp->next_byaddr->prev_byaddr = lp->prev_byaddr;
2N/A
2N/A    /* unlink from size list */
2N/A    lp->prev_bysize->next_bysize = lp->next_bysize;
2N/A    lp->next_bysize->prev_bysize = lp->prev_bysize;
2N/A}
2N/A
2N/Astatic void
2N/Aposition_oversize_by_size(oversize_t *op)
2N/A{
2N/A    oversize_t *sp;
2N/A
2N/A    if (op->size > op->next_bysize->size ||
2N/A        op->size < op->prev_bysize->size) {
2N/A
2N/A        /* unlink from size list */
2N/A        op->prev_bysize->next_bysize = op->next_bysize;
2N/A        op->next_bysize->prev_bysize = op->prev_bysize;
2N/A
2N/A        /* locate correct insertion point in size-ordered list */
2N/A        for (sp = oversize_list.next_bysize;
2N/A            sp != &oversize_list && (op->size > sp->size);
2N/A            sp = sp->next_bysize)
2N/A            ;
2N/A
2N/A        /* link into size-ordered list */
2N/A        op->next_bysize = sp;
2N/A        op->prev_bysize = sp->prev_bysize;
2N/A        op->prev_bysize->next_bysize = op;
2N/A        op->next_bysize->prev_bysize = op;
2N/A    }
2N/A}
2N/A
2N/Astatic void
2N/Aadd_oversize(oversize_t *lp)
2N/A{
2N/A    int merge_flags = INSERT_ONLY;
2N/A    oversize_t *nx;     /* ptr to item right of insertion point */
2N/A    oversize_t *pv;     /* ptr to item left of insertion point */
2N/A    uint_t size_lp, size_pv, size_nx;
2N/A    uintptr_t endp_lp, endp_pv, endp_nx;
2N/A
2N/A    /*
2N/A     * Locate insertion point in address-ordered list
2N/A     */
2N/A
2N/A    for (nx = oversize_list.next_byaddr;
2N/A        nx != &oversize_list && (lp->addr > nx->addr);
2N/A        nx = nx->next_byaddr)
2N/A        ;
2N/A
2N/A    /*
2N/A     * Determine how to add chunk to oversize freelist
2N/A     */
2N/A
2N/A    size_lp = OVSZ_HEADER_SIZE + lp->size;
2N/A    endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN);
2N/A    size_lp = endp_lp - (uintptr_t)lp;
2N/A
2N/A    pv = nx->prev_byaddr;
2N/A
2N/A    if (pv->size) {
2N/A
2N/A        size_pv = OVSZ_HEADER_SIZE + pv->size;
2N/A        endp_pv = ALIGN((uintptr_t)pv + size_pv,
2N/A            MTMALLOC_MIN_ALIGN);
2N/A        size_pv = endp_pv - (uintptr_t)pv;
2N/A
2N/A        /* Check for adjacency with left chunk */
2N/A        if ((uintptr_t)lp == endp_pv)
2N/A            merge_flags |= COALESCE_LEFT;
2N/A    }
2N/A
2N/A    if (nx->size) {
2N/A
2N/A        /* Check for adjacency with right chunk */
2N/A        if ((uintptr_t)nx == endp_lp) {
2N/A            size_nx = OVSZ_HEADER_SIZE + nx->size;
2N/A            endp_nx = ALIGN((uintptr_t)nx + size_nx,
2N/A                MTMALLOC_MIN_ALIGN);
2N/A            size_nx = endp_nx - (uintptr_t)nx;
2N/A            merge_flags |= COALESCE_RIGHT;
2N/A        }
2N/A    }
2N/A
2N/A    /*
2N/A     * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with
2N/A     * FREEPATTERN for lp->size bytes. If we can merge, the oversize
2N/A     * header(s) that will also become part of the memory available for
2N/A     * reallocation (ie lp and/or nx) must also be overwritten with
2N/A     * FREEPATTERN or we will SIGABRT when this memory is next reallocated.
2N/A     */
2N/A    switch (merge_flags) {
2N/A
2N/A    case INSERT_ONLY:       /* Coalescing not possible */
2N/A        insert_oversize(lp, nx);
2N/A        break;
2N/A    case COALESCE_LEFT:
2N/A        pv->size += size_lp;
2N/A        position_oversize_by_size(pv);
2N/A        if (debugopt & MTDEBUGPATTERN)
2N/A            copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
2N/A        break;
2N/A    case COALESCE_RIGHT:
2N/A        unlink_oversize(nx);
2N/A        lp->size += size_nx;
2N/A        insert_oversize(lp, pv->next_byaddr);
2N/A        if (debugopt & MTDEBUGPATTERN)
2N/A            copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
2N/A        break;
2N/A    case COALESCE_WITH_BOTH_SIDES:  /* Merge (with right) to the left */
2N/A        pv->size += size_lp + size_nx;
2N/A        unlink_oversize(nx);
2N/A        position_oversize_by_size(pv);
2N/A        if (debugopt & MTDEBUGPATTERN) {
2N/A            copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
2N/A            copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
2N/A        }
2N/A        break;
2N/A    }
2N/A}
2N/A
2N/A/*
2N/A * Find memory on our list that is at least size big. If we find a block that is
2N/A * big enough, we break it up and return the associated oversize_t struct back
2N/A * to the calling client. Any leftover piece of that block is returned to the
2N/A * freelist.
2N/A */
2N/Astatic oversize_t *
2N/Afind_oversize(size_t size)
2N/A{
2N/A    oversize_t *wp = oversize_list.next_bysize;
2N/A    while (wp != &oversize_list && size > wp->size)
2N/A        wp = wp->next_bysize;
2N/A
2N/A    if (wp == &oversize_list) /* empty list or nothing big enough */
2N/A        return (NULL);
2N/A    /* breaking up a chunk of memory */
2N/A    if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN)))
2N/A        > MAX_CACHED) {
2N/A        caddr_t off;
2N/A        oversize_t *np;
2N/A        size_t osize;
2N/A        off = (caddr_t)ALIGN(wp->addr + size,
2N/A            MTMALLOC_MIN_ALIGN);
2N/A        osize = wp->size;
2N/A        wp->size = (size_t)(off - wp->addr);
2N/A        np = oversize_header_alloc((uintptr_t)off,
2N/A            osize - (wp->size + OVSZ_HEADER_SIZE));
2N/A        if ((long)np->size < 0)
2N/A            abort();
2N/A        unlink_oversize(wp);
2N/A        add_oversize(np);
2N/A    } else {
2N/A        unlink_oversize(wp);
2N/A    }
2N/A    return (wp);
2N/A}
2N/A
2N/Astatic void
2N/Acopy_pattern(uint32_t pattern, void *buf_arg, size_t size)
2N/A{
2N/A    uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
2N/A    uint32_t *buf = buf_arg;
2N/A
2N/A    while (buf < bufend - 3) {
2N/A        buf[3] = buf[2] = buf[1] = buf[0] = pattern;
2N/A        buf += 4;
2N/A    }
2N/A    while (buf < bufend)
2N/A        *buf++ = pattern;
2N/A}
2N/A
2N/Astatic void *
2N/Averify_pattern(uint32_t pattern, void *buf_arg, size_t size)
2N/A{
2N/A    uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
2N/A    uint32_t *buf;
2N/A
2N/A    for (buf = buf_arg; buf < bufend; buf++)
2N/A        if (*buf != pattern)
2N/A            return (buf);
2N/A    return (NULL);
2N/A}
2N/A
2N/Astatic void
2N/Afree_oversize(oversize_t *ovp, int alloced)
2N/A{
2N/A    assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */
2N/A    assert(ovp->size > MAX_CACHED);
2N/A
2N/A    ovp->next_bysize = ovp->prev_bysize = NULL;
2N/A    ovp->next_byaddr = ovp->prev_byaddr = NULL;
2N/A    (void) mutex_lock(&oversize_lock);
2N/A    if (alloced)
2N/A        (void) remove_hash(ovp);
2N/A    add_oversize(ovp);
2N/A    (void) mutex_unlock(&oversize_lock);
2N/A}
2N/A
2N/Astatic oversize_t *
2N/Aoversize_header_alloc(uintptr_t mem, size_t size)
2N/A{
2N/A    oversize_t *ovsz_hdr;
2N/A
2N/A    assert(size > MAX_CACHED);
2N/A
2N/A    ovsz_hdr = (oversize_t *)mem;
2N/A    ovsz_hdr->prev_bysize = NULL;
2N/A    ovsz_hdr->next_bysize = NULL;
2N/A    ovsz_hdr->prev_byaddr = NULL;
2N/A    ovsz_hdr->next_byaddr = NULL;
2N/A    ovsz_hdr->hash_next = NULL;
2N/A    ovsz_hdr->size = size;
2N/A    mem += OVSZ_SIZE;
2N/A    *(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC;
2N/A    mem += OVERHEAD;
2N/A    assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */
2N/A    ovsz_hdr->addr = (caddr_t)mem;
2N/A    return (ovsz_hdr);
2N/A}
2N/A
2N/Astatic void
2N/Amalloc_prepare()
2N/A{
2N/A    percpu_t *cpuptr;
2N/A
2N/A    (void) mutex_lock(&oversize_lock);
2N/A    for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
2N/A        (void) mutex_lock(&cpuptr->mt_parent_lock);
2N/A    }
2N/A}
2N/A
2N/Astatic void
2N/Amalloc_release()
2N/A{
2N/A    percpu_t *cpuptr;
2N/A
2N/A    for (cpuptr = &cpu_list[ncpus - 1]; cpuptr >= &cpu_list[0]; cpuptr--) {
2N/A        (void) mutex_unlock(&cpuptr->mt_parent_lock);
2N/A    }
2N/A    (void) mutex_unlock(&oversize_lock);
2N/A}
2N/A
2N/A#pragma init(malloc_init)
2N/Astatic void
2N/Amalloc_init(void)
2N/A{
2N/A    /*
2N/A     * This works in the init section for this library
2N/A     * because setup_caches() doesn't call anything in libc
2N/A     * that calls malloc().  If it did, disaster would ensue.
2N/A     *
2N/A     * For this to work properly, this library must be the first
2N/A     * one to have its init section called (after libc) by the
2N/A     * dynamic linker.  If some other library's init section
2N/A     * ran first and called malloc(), disaster would ensue.
2N/A     * Because this is an interposer library for malloc(), the
2N/A     * dynamic linker arranges for its init section to run first.
2N/A     */
2N/A    int i;
2N/A    i = setup_caches();
2N/A    if (i == 0)
2N/A        abort();
2N/A
2N/A    (void) pthread_atfork(malloc_prepare, malloc_release, malloc_release);
2N/A}