a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie/*
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * CDDL HEADER START
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie *
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * The contents of this file are subject to the terms of the
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * Common Development and Distribution License (the "License").
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * You may not use this file except in compliance with the License.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie *
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * or http://www.opensolaris.org/os/licensing.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * See the License for the specific language governing permissions
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * and limitations under the License.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie *
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * When distributing Covered Code, include this CDDL HEADER in each
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * If applicable, add the following below this CDDL HEADER, with the
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * fields enclosed by brackets "[]" replaced with your own identifying
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * information: Portions Copyright [yyyy] [name of copyright owner]
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie *
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * CDDL HEADER END
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie/*
cce0e03bb2d07f0fe27cabb93acae9c23655859fab * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * Use is subject to license terms.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#ifndef __STRING_TABLE_DOT_H
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#define __STRING_TABLE_DOT_H
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#pragma ident "%Z%%M% %I% %E% SMI"
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#include <sys/types.h>
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#include <sys/avl.h>
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#include <string_table.h>
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#ifdef __cplusplus
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barieextern "C" {
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#endif
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie/*
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * A string is represented in a string table using two values: length, and
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * value. Grouping all the strings of a given length together allows for
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * efficient matching of tail strings, as each input string value is hashed.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * Each string table uses a 2-level AVL tree of AVL trees to represent this
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * organization.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie *
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * The outer (main) AVL tree contains LenNode structures. The search key for
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * nodes on this main tree is the string length. Each such node represents
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * all strings of a given length, and all strings of that length are found
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * within.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie *
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * The strings within each LenNode are maintained using a secondary AVL tree
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * of StrNode structures. The search key for this inner tree is the string
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * itself. The strings are maintained in lexical order.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barietypedef struct {
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie avl_node_t sn_avlnode; /* AVL book-keeping */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie const char *sn_str; /* string */
cce0e03bb2d07f0fe27cabb93acae9c23655859fab size_t sn_refcnt; /* reference count */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie} StrNode;
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barietypedef struct {
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie avl_node_t ln_avlnode; /* AVL book-keeping */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie avl_tree_t *ln_strtree; /* AVL tree of associated strings */
cce0e03bb2d07f0fe27cabb93acae9c23655859fab size_t ln_strlen; /* length of associated strings */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie} LenNode;
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie/*
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * Define a master string data item. Other strings may be suffixes of this
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * string. The final string table will consist of the master string values,
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * laid end to end, with the other strings referencing their tails.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barietypedef struct str_master Str_master;
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0bariestruct str_master {
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie const char *sm_str; /* pointer to master string */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie Str_master *sm_next; /* used for tracking master strings */
cce0e03bb2d07f0fe27cabb93acae9c23655859fab size_t sm_strlen; /* length of master string */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie uint_t sm_hashval; /* hashval of master string */
cce0e03bb2d07f0fe27cabb93acae9c23655859fab size_t sm_stroff; /* offset into destination strtab */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie};
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie/*
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * Define a hash data item. This item represents an individual string that has
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * been input into the String hash table. The string may either be a suffix of
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * another string, or a master string.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barietypedef struct str_hash Str_hash;
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0bariestruct str_hash {
cce0e03bb2d07f0fe27cabb93acae9c23655859fab size_t hi_strlen; /* string length */
cce0e03bb2d07f0fe27cabb93acae9c23655859fab size_t hi_refcnt; /* number of references to str */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie uint_t hi_hashval; /* hash for string */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie Str_master *hi_mstr; /* pointer to master string */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie Str_hash *hi_next; /* next entry in hash bucket */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie};
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie/*
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * Controlling data structure for a String Table.
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0bariestruct str_tbl {
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie avl_tree_t *st_lentree; /* AVL tree of string lengths */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie char *st_strbuf; /* string buffer */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie Str_hash **st_hashbcks; /* hash buckets */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie Str_master *st_mstrlist; /* list of all master strings */
cce0e03bb2d07f0fe27cabb93acae9c23655859fab size_t st_fullstrsize; /* uncompressed table size */
cce0e03bb2d07f0fe27cabb93acae9c23655859fab size_t st_nextoff; /* next available string */
cce0e03bb2d07f0fe27cabb93acae9c23655859fab size_t st_strsize; /* compressed size */
cce0e03bb2d07f0fe27cabb93acae9c23655859fab size_t st_strcnt; /* number of strings */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie uint_t st_hbckcnt; /* number of buckets in */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie /* hashlist */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie uint_t st_flags;
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie};
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#define FLG_STTAB_COMPRESS 0x01 /* compressed string table */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#define FLG_STTAB_COOKED 0x02 /* offset has been assigned */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie/*
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie * Starting value for use with string hashing functions inside of string_table.c
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie */
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#define HASHSEED 5381
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#ifdef __cplusplus
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie}
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#endif
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie
a194faf8907a6722dcf10ad16c6ca72c9b7bd0barie#endif /* __STRING_TABLE_DOT_H */