/* $Id$ */
/** @file
* IPRT - String Cache.
*/
/*
* Copyright (C) 2009-2013 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#include <iprt/strcache.h>
#include <iprt/critsect.h>
/*******************************************************************************
* Defined Constants And Macros *
*******************************************************************************/
/** Special NIL pointer for the hash table. It differs from NULL in that it is
* a valid hash table entry when doing a lookup. */
/** Calcuates the increment when handling a collision.
* The current formula makes sure it's always odd so we cannot possibly end
* up a cyclic loop with an even sized table. It also takes more bits from
* the length part. */
/** The initial hash table size. Must be power of two. */
/** The hash table growth factor. */
/**
* The RTSTRCACHEENTRY size threshold at which we stop using our own allocator
* and switch to the application heap, expressed as a power of two.
*
* Using a 1KB as a reasonable limit here.
*/
#else
#endif
/** The RTSTRCACHE_HEAP_THRESHOLD_BIT as a byte limit. */
/** Big (heap) entry size alignment. */
/**
* The RTSTRCACHEENTRY size threshold at which we start using the merge free
* list for allocations, expressed as a power of two.
*/
/** The number of bytes (power of two) that the merged allocation lists should
* be grown by. Must be much greater than RTSTRCACHE_MERGED_THRESHOLD. */
#endif
/** The number of bytes (power of two) that the fixed allocation lists should
* be grown by. */
/** The number of fixed sized lists. */
/** Validates a string cache handle, translating RTSTRCACHE_DEFAULT when found,
* and returns rc if not valid. */
do { \
if ((pStrCache) == RTSTRCACHE_DEFAULT) \
{ \
if (RT_FAILURE(rcOnce)) \
return (rc); \
(pStrCache) = g_hrtStrCacheDefault; \
} \
else \
{ \
} \
} while (0)
/*******************************************************************************
* Structures and Typedefs *
*******************************************************************************/
/**
* String cache entry.
*/
typedef struct RTSTRCACHEENTRY
{
/** The number of references. */
/** The lower 16-bit hash value. */
/** The string length (excluding the terminator).
* If this is set to RTSTRCACHEENTRY_BIG_LEN, this is a BIG entry
* (RTSTRCACHEBIGENTRY). */
/** The string. */
/** Pointer to a string cache entry. */
/** Pointer to a const string cache entry. */
/** RTSTCACHEENTRY::cchString value for big cache entries. */
/**
* Big string cache entry.
*
* These are allocated individually from the application heap.
*/
typedef struct RTSTRCACHEBIGENTRY
{
/** List entry. */
/** The string length. */
/** The full hash value / padding. */
/** The core entry. */
/** Pointer to a big string cache entry. */
/** Pointer to a const big string cache entry. */
/**
* A free string cache entry.
*/
typedef struct RTSTRCACHEFREE
{
/** Zero value indicating that it's a free entry (no refs, no hash). */
/** Number of free bytes. Only used for > 32 byte allocations. */
/** Pointer to the next free item. */
/** Pointer to a free string cache entry. */
/**
* A free string cache entry with merging.
*
* This differs from RTSTRCACHEFREE only in having a back pointer for more
* efficient list management (doubly vs. singly linked lists).
*/
typedef struct RTSTRCACHEFREEMERGE
{
/** Marker that indicates what kind of entry this is, either . */
/** Number of free bytes. Only used for > 32 byte allocations. */
/** Pointer to the main node. NULL for main nodes. */
/** The free list entry. */
/** Pads the size up to the minimum allocation unit for the merge list.
* This both defines the minimum allocation unit and simplifies pointer
* manipulation during merging and splitting. */
/** Pointer to a free cache string in the merge list. */
/** RTSTRCACHEFREEMERGE::uMarker value indicating that it's the real free chunk
* header. Must be something that's invalid UTF-8 for both little and big
* endian system. */
/** RTSTRCACHEFREEMERGE::uMarker value indicating that it's part of a larger
* chunk of free memory. Must be something that's invalid UTF-8 for both little
* and big endian system. */
#endif /* RTSTRCACHE_WITH_MERGED_ALLOCATOR */
/**
* Tracking structure chunk of memory used by the 16 byte or 32 byte
* allocations.
*
* This occupies the first entry in the chunk.
*/
typedef struct RTSTRCACHECHUNK
{
/** The size of the chunk. */
/** Pointer to the next chunk. */
/** Pointer to the chunk tracking structure. */
/**
* Cache instance data.
*/
typedef struct RTSTRCACHEINT
{
/** The string cache magic (RTSTRCACHE_MAGIC). */
/** Ref counter for the cache handle. */
/** The number of strings currently entered in the cache. */
/** The size of the hash table. */
/** Pointer to the hash table. */
/** Free list for allocations of the sizes defined by g_acbFixedLists. */
/** Free lists based on */
#endif
/** List of allocated memory chunks. */
/** List of big cache entries. */
/** @name Statistics
* @{ */
/** The total size of all chunks. */
/** The total length of all the strings, terminators included. */
/** The total size of all the big entries. */
/** Hash collisions. */
/** Secondary hash collisions. */
/** The number of inserts to compare cHashCollisions to. */
/** The number of rehashes. */
/** @} */
/** Critical section protecting the cache structures. */
/** Pointer to a cache instance. */
/*******************************************************************************
* Global Variables *
*******************************************************************************/
/** The entry sizes of the fixed lists (RTSTRCACHEINT::apFreeLists). */
{
16, 32, 48, 64, 96, 128, 192, 256, 320, 384, 448, 512
};
/** Init once for the default string cache. */
/** The default string cache. */
/** @callback_method_impl{FNRTONCE, Initializes g_hrtStrCacheDefault} */
{
}
{
if (pThis)
{
if (pThis->papHashTab)
{
if (RT_SUCCESS(rc))
{
#endif
*phStrCache = pThis;
return VINF_SUCCESS;
}
}
}
return rc;
}
{
if ( hStrCache == NIL_RTSTRCACHE
|| hStrCache == RTSTRCACHE_DEFAULT)
return VINF_SUCCESS;
/*
* Invalidate it. Enter the crit sect just to be on the safe side.
*/
AssertReturn(ASMAtomicCmpXchgU32(&pThis->u32Magic, RTSTRCACHE_MAGIC_DEAD, RTSTRCACHE_MAGIC), VERR_INVALID_HANDLE);
{
}
{
}
return VINF_SUCCESS;
}
/**
* Selects the fixed free list index for a given minimum entry size.
*
* @returns Free list index.
* @param cbMin Minimum entry size.
*/
{
unsigned i = 0;
while (cbMin > g_acbFixedLists[i])
i++;
return i;
}
#ifdef RT_STRICT
/**
* Internal cache check.
*/
{
# ifdef RTSTRCACHE_WITH_MERGED_ALLOCATOR
{
{
}
}
# endif
}
#else
#endif
/**
* Finds the first empty hash table entry given a hash+length value.
*
* ASSUMES that the hash table isn't full.
*
* @returns Hash table index.
* @param pThis The string cache instance.
* @param uHashLen The hash + length (not RTSTRCACHEENTRY_BIG_LEN).
*/
{
for (;;)
{
return iHash;
/* Advance. */
}
}
/**
* Grows the hash table.
*
* @returns vINF_SUCCESS or VERR_NO_MEMORY.
* @param pThis The string cache instance.
*/
{
/*
* Allocate a new hash table two times the size of the old one.
*/
return VERR_NO_MEMORY;
/*
* Install the new table and move the items from the old table and into the new one.
*/
while (iOld-- > 0)
{
{
if (cchString == RTSTRCACHEENTRY_BIG_LEN)
}
}
/*
* Free the old hash table.
*/
return VINF_SUCCESS;
}
/**
*
* @param pThis The string cache instance.
* @param pFree The free string entry.
*/
{
}
/**
* Allocate a cache entry from the merged free lists.
*
* @returns Pointer to the cache entry on success, NULL on allocation error.
* @param pThis The string cache instance.
* @param uHash The full hash of the string.
* @param pchString The string.
* @param cchString The string length.
* @param cbEntry The required entry size.
*/
{
/*
* Search the list heads first.
*/
if (!RT_IS_POWER_OF_TWO(cbEntry))
iList++;
{
if (pFree)
{
/*
* Found something. Should we we split it? We split from the end
* to avoid having to update all the sub entries.
*/
else
{
pFree += cRemainder;
}
break;
}
iList++;
}
if (!pFree)
{
/*
* Allocate a new block. (We could search the list below in some
* cases, but it's too much effort to write and execute).
*/
if (!pChunk)
return NULL;
/*
* Get one node for the allocation at hand.
*/
/*
* Create a free block out of the remainder (always a reminder).
*/
pNewFree->cbFree = cbChunk - sizeof(*pNewFree) - cbEntry; Assert(pNewFree->cbFree < cbChunk && pNewFree->cbFree > 0);
while (iInternalBlock-- > 1)
{
}
}
/*
* Initialize the entry. We zero all bytes we don't use so they cannot
* accidentally be mistaken for a free entry.
*/
RT_BZERO(&pEntry->szString[cchString], cbEntry - RT_UOFFSETOF(RTSTRCACHEENTRY, szString) - cchString);
return pEntry;
}
#endif /* RTSTRCACHE_WITH_MERGED_ALLOCATOR */
/**
* Allocate a cache entry from the heap.
*
* @returns Pointer to the cache entry on success, NULL on allocation error.
* @param pThis The string cache instance.
* @param uHash The full hash of the string.
* @param pchString The string.
* @param cchString The string length.
*/
{
/*
* Allocate a heap block for storing the string. We do some size aligning
* here to encourage the heap to give us optimal alignment.
*/
PRTSTRCACHEBIGENTRY pBigEntry = (PRTSTRCACHEBIGENTRY)RTMemAlloc(RT_ALIGN_Z(cbEntry, RTSTRCACHE_HEAP_ENTRY_SIZE_ALIGN));
if (!pBigEntry)
return NULL;
/*
* Initialize the block.
*/
}
/**
* Allocate a cache entry from a fixed size free list.
*
* @returns Pointer to the cache entry on success, NULL on allocation error.
* @param pThis The string cache instance.
* @param uHash The full hash of the string.
* @param pchString The string.
* @param cchString The string length.
* @param iFreeList Which free list.
*/
{
/*
* Get an entry from the free list. If empty, allocate another chunk of
* memory and split it up into free entries of the desired size.
*/
if (!pFree)
{
if (!pChunk)
return NULL;
while (cLeft-- > 0)
{
}
}
/*
* Unlink it.
*/
/*
* Initialize the entry.
*/
return pEntry;
}
/**
* Looks up a string in the hash table.
*
* @returns Pointer to the string cache entry, NULL + piFreeHashTabEntry if not
* found.
* @param pThis The string cache instance.
* @param uHashLen The hash + length (not RTSTRCACHEENTRY_BIG_LEN).
* @param cchString The real length.
* @param pchString The string.
* @param piFreeHashTabEntry Where to store the index insertion index if NULL
* is returned (same as what
* rtStrCacheFindEmptyHashTabEntry would return).
* @param pcCollisions Where to return a collision counter.
*/
static PRTSTRCACHEENTRY rtStrCacheLookUp(PRTSTRCACHEINT pThis, uint32_t uHashLen, uint32_t cchString, const char *pchString,
{
*pcCollisions = 0;
uint16_t cchStringFirst = RT_UOFFSETOF(RTSTRCACHEENTRY, szString[cchString + 1]) < RTSTRCACHE_HEAP_THRESHOLD
for (;;)
{
/* Give up if NULL, but record the index for insertion. */
{
if (*piFreeHashTabEntry == UINT32_MAX)
return NULL;
}
if (pEntry != PRTSTRCACHEENTRY_NIL)
{
/* Compare. */
{
{
return pEntry;
}
else
{
}
}
if (*piFreeHashTabEntry == UINT32_MAX)
*pcCollisions += 1;
}
/* Record the first NIL index for insertion in case we don't get a hit. */
else if (*piFreeHashTabEntry == UINT32_MAX)
/* Advance. */
}
}
RTDECL(const char *) RTStrCacheEnterN(RTSTRCACHE hStrCache, const char *pchString, size_t cchString)
{
/*
* Calculate the hash and figure the exact string length, then look for an existing entry.
*/
PRTSTRCACHEENTRY pEntry = rtStrCacheLookUp(pThis, uHashLen, cchString32, pchString, &iFreeHashTabEntry, &cCollisions);
if (pEntry)
{
}
else
{
/*
* Allocate a new entry.
*/
if (cbEntry >= RTSTRCACHE_HEAP_THRESHOLD)
else if (cbEntry >= RTSTRCACHE_MERGED_THRESHOLD_BIT)
#endif
else
if (!pEntry)
{
return NULL;
}
/*
* Insert it into the hash table.
*/
{
if (RT_SUCCESS(rc))
{
pThis->cHashInserts++;
return NULL;
}
}
pThis->cHashInserts++;
}
}
{
}
static const char *rtStrCacheEnterLowerWorker(PRTSTRCACHEINT pThis, const char *pchString, size_t cchString)
{
/*
* Try use a dynamic heap buffer first.
*/
if (cchString < 512)
{
if (pszStackBuf)
{
}
}
/*
* Fall back on heap.
*/
if (!pszHeapBuf)
return NULL;
return pszRet;
}
RTDECL(const char *) RTStrCacheEnterLowerN(RTSTRCACHE hStrCache, const char *pchString, size_t cchString)
{
}
{
}
{
return cRefs;
}
{
/* Remove it from the hash table. */
else
{
do
{
else
{
AssertFailed();
while (iHash-- > 0)
break;
}
}
/* Free it. */
{
if (cbMin <= RTSTRCACHE_MAX_FIXED)
#endif
{
/*
* No merging, just add it to the list.
*/
}
else
{
/*
* Complicated mode, we merge with adjecent nodes.
*/
/*
* Merge with previous?
* (Reading one block back is safe because there is always the
* RTSTRCACHECHUNK structure at the head of each memory chunk.)
*/
{
pMain--;
}
else
{
pFreeStr++;
}
/*
* Mark internal blocks in the string we're freeing.
*/
while (cInternalBlocks-- > 0)
{
pFreeStr++;
}
/*
* Merge with next? Limitation: We won't try cross page boundraries.
* (pFreeStr points to the next first free enter after the string now.)
*/
{
Assert(cInternalBlocks > 0);
/* Update the main block we merge with. */
/* Change the internal blocks we merged in. */
while (cInternalBlocks-- > 0)
{
pFreeStr++;
}
}
/*
*/
}
#endif /* RTSTRCACHE_WITH_MERGED_ALLOCATOR */
}
else
{
/* Big string. */
}
return 0;
}
{
if (!psz)
return 0;
/*
* Drop a reference and maybe free the entry.
*/
if (!cRefs)
return cRefs;
}
{
if (!psz)
return 0;
{
}
}
{
return true;
}
RTDECL(uint32_t) RTStrCacheGetStats(RTSTRCACHE hStrCache, size_t *pcbStrings, size_t *pcbChunks, size_t *pcbBigEntries,
{
if (pcbStrings)
if (pcbChunks)
if (pcbBigEntries)
if (pcHashCollisions)
if (pcHashCollisions2)
if (pcHashInserts)
if (pcRehashes)
return cStrings;
}