memcache.cpp revision 36f105528f08a78e2eff142de3d160b66dffd496
661bfa5aae55ac2f94fa1cb131ea2323e5f6e633vboxsync * IPRT - Memory Object Allocation Cache.
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * Copyright (C) 2006-2012 Oracle Corporation
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * This file is part of VirtualBox Open Source Edition (OSE), as
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * available from http://www.virtualbox.org. This file is free software;
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * you can redistribute it and/or modify it under the terms of the GNU
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * General Public License (GPL) as published by the Free Software
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * Foundation, in version 2 as it comes in the "COPYING" file of the
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * The contents of this file may alternatively be used under the terms
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * of the Common Development and Distribution License Version 1.0
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * VirtualBox OSE distribution, in which case the provisions of the
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * CDDL are applicable instead of those of the GPL.
661bfa5aae55ac2f94fa1cb131ea2323e5f6e633vboxsync * You may elect to license modified versions of this file under the
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * terms and conditions of either the GPL or the CDDL or both.
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync/*******************************************************************************
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync* Header Files *
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync*******************************************************************************/
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync/*******************************************************************************
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync* Structures and Typedefs *
5c4d7e2aae42bbf39793dfa686925f076a56b4d5vboxsync*******************************************************************************/
5c4d7e2aae42bbf39793dfa686925f076a56b4d5vboxsync/** Pointer to a cache instance. */
5c4d7e2aae42bbf39793dfa686925f076a56b4d5vboxsync/** Pointer to a cache page. */
5c4d7e2aae42bbf39793dfa686925f076a56b4d5vboxsync * A free object.
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync * @remarks This only works if the objects don't have a constructor or
5c4d7e2aae42bbf39793dfa686925f076a56b4d5vboxsync * destructor and are big enough.
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** Pointer to the next free object */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync/** Pointer to a free object. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * A cache page.
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * This is a page of memory that we split up in to a bunch object sized chunks
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * and hand out to the cache users. The bitmap is updated in an atomic fashion
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * so that we don't have to take any locks when freeing or allocating memory.
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** Pointer to the cache owning this page.
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * This is used for validation purposes only. */
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync /** Pointer to the next page.
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync * This is marked as volatile since we'll be adding new entries to the list
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync * without taking any locks. */
5c4d7e2aae42bbf39793dfa686925f076a56b4d5vboxsync /** Bitmap tracking allocated blocks. */
5c4d7e2aae42bbf39793dfa686925f076a56b4d5vboxsync void volatile *pbmAlloc;
5c4d7e2aae42bbf39793dfa686925f076a56b4d5vboxsync /** Bitmap tracking which blocks that has been thru the constructor. */
5c4d7e2aae42bbf39793dfa686925f076a56b4d5vboxsync void volatile *pbmCtor;
5c4d7e2aae42bbf39793dfa686925f076a56b4d5vboxsync /** Pointer to the object array.. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** The number of objects on this page. */
0c94a8282c9042b02f022302a3d987746140eab9vboxsync /** Padding to force cFree into the next cache line. (ASSUMES CL = 64) */
0c94a8282c9042b02f022302a3d987746140eab9vboxsync uint8_t abPadding[ARCH_BITS == 32 ? 64 - 6*4 : 64 - 5*8 - 4];
0c94a8282c9042b02f022302a3d987746140eab9vboxsync /** The number of free objects. */
0c94a8282c9042b02f022302a3d987746140eab9vboxsyncAssertCompileMemberOffset(RTMEMCACHEPAGE, cFree, 64);
0c94a8282c9042b02f022302a3d987746140eab9vboxsync * Memory object cache instance.
0c94a8282c9042b02f022302a3d987746140eab9vboxsync /** Magic value (RTMEMCACHE_MAGIC). */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** The object size. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** Object alignment. */
0c94a8282c9042b02f022302a3d987746140eab9vboxsync /** The per page object count. */
0c94a8282c9042b02f022302a3d987746140eab9vboxsync /** Number of bits in the bitmap.
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * @remarks This is higher or equal to cPerPage and it is aligned such that
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * the search operation will be most efficient on x86/AMD64. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** The maximum number of objects. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** Whether to the use the free list or not. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** Head of the page list. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** Constructor callback. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** Destructor callback. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** Callback argument. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** Critical section serializing page allocation and similar. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** The total object count. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** The number of free objects. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** This may point to a page with free entries. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /** Stack of free items.
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * These are marked as used in the allocation bitmaps.
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * @todo This doesn't scale well when several threads are beating on the
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * cache. Also, it totally doesn't work when the objects are too
f9cdd92d151d9c28eb0f1aed25863fc04f85691dvboxsync * small. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsyncRTDECL(int) RTMemCacheCreate(PRTMEMCACHE phMemCache, size_t cbObject, size_t cbAlignment, uint32_t cMaxObjects,
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync PFNMEMCACHECTOR pfnCtor, PFNMEMCACHEDTOR pfnDtor, void *pvUser, uint32_t fFlags)
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync AssertReturn(!pfnDtor || pfnCtor, VERR_INVALID_PARAMETER);
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync AssertReturn(cbObject > 0, VERR_INVALID_PARAMETER);
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync AssertReturn(cbObject <= PAGE_SIZE / 8, VERR_INVALID_PARAMETER);
0c94a8282c9042b02f022302a3d987746140eab9vboxsync AssertReturn(!((cbAlignment - 1) & cbAlignment), VERR_NOT_POWER_OF_TWO);
0c94a8282c9042b02f022302a3d987746140eab9vboxsync AssertReturn(cbAlignment <= 64, VERR_OUT_OF_RANGE);
0c94a8282c9042b02f022302a3d987746140eab9vboxsync * Allocate and initialize the instance memory.
0c94a8282c9042b02f022302a3d987746140eab9vboxsync RTMEMCACHEINT *pThis = (RTMEMCACHEINT *)RTMemAlloc(sizeof(*pThis));
0c94a8282c9042b02f022302a3d987746140eab9vboxsync pThis->cbObject = (uint32_t)RT_ALIGN_Z(cbObject, cbAlignment);
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync pThis->cPerPage = (uint32_t)((PAGE_SIZE - RT_ALIGN_Z(sizeof(RTMEMCACHEPAGE), cbAlignment)) / pThis->cbObject);
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync pThis->fUseFreeList = cbObject >= sizeof(RTMEMCACHEFREEOBJ)
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * Here is a puzzler (or maybe I'm just blind), the free list code breaks
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * badly on my macbook pro (i7) (32-bit).
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * I tried changing the reads from unordered to ordered to no avail. Then I
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * tried optimizing the code with the ASMAtomicCmpXchgExPtr function to
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * avoid some reads - no change. Inserting pause instructions did nothing
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * (as expected). The only thing which seems to make a difference is
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * reading the pFreeTop pointer twice in the free code... This is weird or I'm
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * overlooking something..
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * No time to figure it out, so I'm disabling the broken code paths for
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync AssertReturn(pThis->u32Magic == RTMEMCACHE_MAGIC, VERR_INVALID_HANDLE);
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync for (PRTMEMCACHEFREEOBJ pFree = pThis->pFreeTop; pFree && cFree < pThis->cTotal + 5; pFree = pFree->pNext)
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync AssertMsg(cFree == pThis->cTotal, ("cFree=%u cTotal=%u\n", cFree, pThis->cTotal));
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * Destroy it.
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync AssertReturn(ASMAtomicCmpXchgU32(&pThis->u32Magic, RTMEMCACHE_MAGIC_DEAD, RTMEMCACHE_MAGIC), VERR_INVALID_HANDLE);
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync while (iObj-- > 0)
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync pThis->pfnDtor(hMemCache, pPage->pbObjects + iObj * pThis->cbObject, pThis->pvUser);
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * Grows the cache.
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync * @returns IPRT status code.
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * @param pThis The memory cache instance.
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * Enter the critical section here to avoid allocation races leading to
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * wasted memory (++) and make it easier to link in the new page.
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync * Allocate and initialize the new page.
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync * We put the constructor bitmap at the lower end right after cFree.
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync * We then push the object array to the end of the page and place the
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * allocation bitmap below it. The hope is to increase the chance that
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * the allocation bitmap is in a different cache line than cFree since
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * this increases performance markably when lots of threads are beating
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync * on the cache.
29099c2d04b11e614f1fa399fab9e9162f2788b9vboxsync PRTMEMCACHEPAGE pPage = (PRTMEMCACHEPAGE)RTMemPageAlloc(PAGE_SIZE);
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync uint32_t const cObjects = RT_MIN(pThis->cPerPage, pThis->cMax - pThis->cTotal);
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync pb = (uint8_t *)pPage + PAGE_SIZE - pThis->cbObject * cObjects;
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync pPage->pbObjects = pb; Assert(RT_ALIGN_P(pb, pThis->cbAlignment) == pb);
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync Assert((uintptr_t)pPage->pbmCtor + pThis->cBits / 8 <= (uintptr_t)pPage->pbmAlloc);
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /* Mark the bitmap padding and any unused objects as allocated. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync for (uint32_t iBit = cObjects; iBit < pThis->cBits; iBit++)
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /* Make it the hint. */
0c69348b58bb8eabb1bea8867ee932b667bd0d34vboxsync /* Link the page. */
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync /* Add it to the page counts. */
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync * Grabs a an object in a page.
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync * @returns New cFree value on success (0 or higher), -1 on failure.
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync * @param pPage Pointer to the page.
66b58af085e22ee26be57f98127fb49ee2e91790vboxsyncDECL_FORCE_INLINE(int32_t) rtMemCacheGrabObj(PRTMEMCACHEPAGE pPage)
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsyncRTDECL(int) RTMemCacheAllocEx(RTMEMCACHE hMemCache, void **ppvObj)
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync AssertReturn(pThis->u32Magic == RTMEMCACHE_MAGIC, VERR_INVALID_PARAMETER);
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync * Try grab a free object from the stack.
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync PRTMEMCACHEFREEOBJ pObj = ASMAtomicUoReadPtrT(&pThis->pFreeTop, PRTMEMCACHEFREEOBJ);
0c94a8282c9042b02f022302a3d987746140eab9vboxsync PRTMEMCACHEFREEOBJ pNext = ASMAtomicUoReadPtrT(&pObj->pNext, PRTMEMCACHEFREEOBJ);
0c94a8282c9042b02f022302a3d987746140eab9vboxsync if (ASMAtomicCmpXchgExPtr(&pThis->pFreeTop, pNext, pObj, &pObjOld))
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync * Try grab a free object at the cache level.
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync uint32_t cTotal = ASMAtomicUoReadU32(&pThis->cTotal);
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync * Grab a free object at the page level.
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync PRTMEMCACHEPAGE pPage = ASMAtomicReadPtrT(&pThis->pPageHint, PRTMEMCACHEPAGE);
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync int32_t iObj = pPage ? rtMemCacheGrabObj(pPage) : -1;
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync for (pPage = pThis->pPageHead; pPage; pPage = pPage->pNext)
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * Find a free object in the allocation bitmap. Use the new cFree count
661bfa5aae55ac2f94fa1cb131ea2323e5f6e633vboxsync * as a hint.
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync iObj = ASMBitFirstClear(pPage->pbmAlloc, pThis->cBits);
661bfa5aae55ac2f94fa1cb131ea2323e5f6e633vboxsync if (!ASMAtomicBitTestAndSet(pPage->pbmAlloc, iObj))
661bfa5aae55ac2f94fa1cb131ea2323e5f6e633vboxsync void *pvObj = &pPage->pbObjects[iObj * pThis->cbObject];
661bfa5aae55ac2f94fa1cb131ea2323e5f6e633vboxsync Assert((uintptr_t)pvObj - (uintptr_t)pPage < PAGE_SIZE);
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * Call the constructor?
661bfa5aae55ac2f94fa1cb131ea2323e5f6e633vboxsync int rc = pThis->pfnCtor(hMemCache, pvObj, pThis->pvUser);
661bfa5aae55ac2f94fa1cb131ea2323e5f6e633vboxsyncRTDECL(void *) RTMemCacheAlloc(RTMEMCACHE hMemCache)
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsyncRTDECL(void) RTMemCacheFree(RTMEMCACHE hMemCache, void *pvObj)
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync AssertReturnVoid(pThis->u32Magic == RTMEMCACHE_MAGIC);
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync Assert(RT_ALIGN_P(pvObj, pThis->cbAlignment) == pvObj);
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync /* This is the same as the other branch, except it's not actually freed. */
661bfa5aae55ac2f94fa1cb131ea2323e5f6e633vboxsync PRTMEMCACHEPAGE pPage = (PRTMEMCACHEPAGE)(((uintptr_t)pvObj) & ~(uintptr_t)PAGE_OFFSET_MASK);
fdb40b7d2efa84fc6f03b7a695cb4b2e035c30c7vboxsync Assert(ASMAtomicUoReadS32(&pPage->cFree) < (int32_t)pThis->cPerPage);
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync uintptr_t offObj = (uintptr_t)pvObj - (uintptr_t)pPage->pbObjects;
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync AssertReturnVoid(ASMBitTest(pPage->pbmAlloc, (int32_t)iObj));
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * Push it onto the free stack.
0c94a8282c9042b02f022302a3d987746140eab9vboxsync PRTMEMCACHEFREEOBJ pObj = (PRTMEMCACHEFREEOBJ)pvObj;
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync PRTMEMCACHEFREEOBJ pNext = ASMAtomicUoReadPtrT(&pThis->pFreeTop, PRTMEMCACHEFREEOBJ);
0c94a8282c9042b02f022302a3d987746140eab9vboxsync while (!ASMAtomicCmpXchgExPtr(&pThis->pFreeTop, pObj, pNext, &pFreeTopOld))
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync /* Note: Do *NOT* attempt to poison the object! */
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync * Find the cache page. The page structure is at the start of the page.
661bfa5aae55ac2f94fa1cb131ea2323e5f6e633vboxsync PRTMEMCACHEPAGE pPage = (PRTMEMCACHEPAGE)(((uintptr_t)pvObj) & ~(uintptr_t)PAGE_OFFSET_MASK);
c66c4413faa5a72ce047742f9acfa85e94dec8afvboxsync Assert(ASMAtomicUoReadS32(&pPage->cFree) < (int32_t)pThis->cPerPage);
66b58af085e22ee26be57f98127fb49ee2e91790vboxsync * Clear the bitmap bit and update the two object counter. Order matters!
19320d55d1417c39b3b5673a53aaa5ef177242c8vboxsync uintptr_t offObj = (uintptr_t)pvObj - (uintptr_t)pPage->pbObjects;