string_table.c revision 5aefb6555731130ca4fd295960123d71f2d21fe8
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <string_table.h>
#include <strings.h>
#include <sgs.h>
#include <stdio.h>
/*
* This file provides the interfaces to build a Str_tbl suitable
* for use by either the sgsmsg system or a standard ELF
* SHT_STRTAB.
*
* There are two modes which can be used when constructing a
* string table:
*
* st_new(0)
* standard string table - no compression. This is the
* traditional method and fast
*
* st_new(FLG_STNEW_COMPRESS)
* build a compressed string table which both
* eliminates duplicate strings and permits
* strings with common suffixes (atexit vs. exit) to
* overlap in the table. This provides space
* savings for many string tables.
*
* These string tables are now built with a common interface in a
* two-pass manner, the first pass it to find all of the strings
* required for the string-table and to calculate the size that
* will be required for the final string table.
*
* The second pass allocates the string table and populates the
* strings into the table and returns the offsets the strings
* have been assigned.
*
* The calling sequence to build and populate a string table is:
*
* st_new(); // initialize strtab
*
* st_insert(st1); // first pass of strings ...
* // calculates size required for
* // string table
*
* st_delstring(st?); // remove string previously
* // inserted
*
* st_insert(stN);
*
* st_getstrtab_sz(); // freezes strtab and computes
* // size of table.
*
* st_setstrbuf(); // associates a final destination
* // for the string table
*
* st_setstring(st1); // populate the string table
* ... // offsets are based off of second
* // pass through the string table
* st_setstring(stN);
*
* st_destroy(); // tear down string table
* // structures.
*
* String Suffix Compression Algorithm:
*
* Here's a quick high level overview of the Suffix String
* compression algorithm used. First - the heart of the algorithm
* is a Hash table list which represents a dictionary of all unique
* strings inserted into the string table. The hash function for
* this table is a standard string hash except that the hash starts
* at the last character in the string (&str[n - 1]) and works towards
* the first character in the function (&str[0]). As we compute the
* HASH value for a given string, we also compute the hash values
* for all of the possible suffix strings for that string.
*
* As we compute the hash - at each character see if the current
* suffix string for that hash is already present in the table. If
* it is, and the string is a master string. Then change that
* string to a suffix string of the new string being inserted.
*
* When the final hash value is found (hash for str[0...n]), check
* to see if it is in the hash table - if so increment the reference
* count for the string. If it is not yet in the table, insert a
* new hash table entry for a master string.
*
* The above method will find all suffixes of a given string given
* that the strings are inserted from shortest to longest. That is
* why this is a two phase method, we first collect all of the
* strings and store them based off of their length in a nice AVL tree.
* Once all of the strings have been submitted we then start the
* hash table build by traversing the AVL tree in order and
* inserting the strings from shortest to longest as described
* above.
*
*/
/* LINTLIBRARY */
int
{
return (0);
return (1);
return (-1);
}
/*
* Return a initialized Str_tbl - returns NULL on failure.
*
* stflags:
*
* FLG_STNEW_COMPRESS - build a compressed string table
*
*/
Str_tbl *
{
return (0);
/*
* Start with a leading '\0' - it's tradition.
*/
/*
* Do we compress this string table
*/
if ((stflags & FLG_STNEW_COMPRESS) == 0)
return (stp);
return (0);
}
return (stp);
}
/*
* Tear down a String_Table structure.
*/
void
{
uint_t i;
/*
* cleanup the master strings
*/
if (pmstr)
}
if (pmstr)
if (stp->st_hashbcks) {
for (i = 0; i < stp->st_hbckcnt; i++) {
if (psthash)
}
if (psthash)
}
}
}
/*
* Remove a previously inserted string from the Str_tbl
*/
int
{
/*
* String table can't have been cooked
*/
return (0);
/*
* no strings of this length recorded, let alone
* this specific string - someone goofed.
*/
return (-1);
}
pstlist = 0;
break;
}
if (stlist == 0) {
/*
* string was not found
*/
return (-1);
}
if (pstlist == 0) {
/*
* String is first on list.
*/
} else {
/*
* remove string from list.
*/
}
return (0);
}
/*
* Insert a new string into the Str_tbl
*/
int
{
/*
* String table can't have been cooked
*/
/*
* Null strings always point to the head of the string
* table - no reason to keep searching.
*/
if (stlen == 0)
return (0);
stp->st_stringcnt++;
return (0);
return (-1);
}
return (-1);
return (0);
}
/*
* For a given string - copy it into the buffer associated with
* the string table - and return the offset it has been assigned.
*
* If a value of '-1' is returned - the string was not found in
* the Str_tbl.
*/
int
{
int i;
/*
* String table *must* have been previously cooked
*/
/*
* Null string always points to head of string table
*/
if (stlen == 0) {
*stoff = 0;
return (0);
}
stlen++; /* count for trailing '\0' */
/*
* Have we overflowed our assigned buffer?
*/
return (-1);
return (0);
}
/*
* Calculate reverse hash for string
*/
for (i = stlen; i >= 0; i--) {
str[i]; /* h = ((h * 33) + c) */
}
const char *hstr;
break;
}
}
}
/*
* Did we find the string?
*/
if (sthash == 0)
return (-1);
/*
* Has this string been copied into the string table?
*/
/*
* Have we overflowed our assigned buffer?
*/
return (-1);
mstlen);
}
/*
* Calculate offset of (sub)string
*/
return (0);
}
static int
{
int i;
Str_master *mstr = 0;
/*
* We use a classic 'Bernstein k=33' hash function. But
* instead of hashing from the start of the string to the
* end, we do it in reverse.
*
* This way - we are essentially building all of the
* suffix hashvalues as we go. We can check to see if
* any suffixes already exist in the tree as we generate
* the hash.
*/
for (i = stlen; i >= 0; i--) {
str[i]; /* h = ((h * 33) + c) */
const char *hstr;
if (i == 0) {
/*
* Entry already in table,
* increment refcnt and get
* out.
*/
return (0);
} else {
/*
* If this 'suffix' is
* presently a 'master' string,
* then take over it's record.
*/
/*
* we should only do
* this once.
*/
}
}
}
}
}
}
/*
* Do we need a new master string, or can we take over
* one we already found in the table?
*/
if (mstr == 0) {
/*
* allocate a new master string
*/
return (-1);
} else {
/*
* We are taking over a existing master string,
* the stringsize only increments by the
* difference between the currnet string and the
* previous master.
*/
}
return (-1);
/*
* Insert string element into head of hash list
*/
return (0);
}
/*
* Return amount of space required for the string table.
*/
{
return (stp->st_fullstringsize);
}
void *cookie;
/*
* allocate a hash table about the size of # of
* strings input.
*/
if ((stp->st_hashbcks =
return (0);
/*
* We now walk all of the strings in the list,
* from shortest to longest, and insert them into
* the hashtable.
*/
/*
* Is it possible we have a empty string table,
* if so - the table still conains '\0'
* so still return the size.
*/
return (stp->st_stringsize);
}
return (0);
}
while (stelem) {
/*
* Walk the string lists and insert them
* into the hash list. Once a string is
* inserted we no longer need it's entry,
* so free it
*/
return (0);
if (pstrlist)
}
stelem->se_strlist = 0;
}
/*
* Now that all of the strings have been freed,
* go ahead and quickly re-walk the AVL tree and
* free all of the AVL nodes.
*
* avl_destroy_nodes() beats avl_remove() because
* avl_remove will 'ballance' the tree as nodes
* are deleted - we just want to tear the whole
* thing down now.
*/
stp->st_strtree = 0;
}
return (stp->st_stringsize);
}
/*
* Associate a buffer with the string table.
*/
const char *
{
}
int
{
return (-1);
} else {
return (-1);
}
#ifdef DEBUG
/*
* for debug builds - start with a stringtable filled in
* with '0xff'. This makes it very easy to find wholes
* which we failed to fill in - in the strtab.
*/
stbuf[0] = '\0';
#else
#endif
return (0);
}