avl.c revision 208e825d0597a017edee1b095c64040043c0c673
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* AVL - generic AVL tree implementation for kernel use
*
* A complete description of AVL trees can be found in many CS textbooks.
*
* Here is a very brief overview. An AVL tree is a binary search tree that is
* almost perfectly balanced. By "almost" perfectly balanced, we mean that at
* any given node, the left and right subtrees are allowed to differ in height
* by at most 1 level.
*
* This relaxation from a perfectly balanced binary tree allows doing
* insertion and deletion relatively efficiently. Searching the tree is
* still a fast operation, roughly O(log(N)).
*
* The key to insertion and deletion is a set of tree maniuplations called
* rotations, which bring unbalanced subtrees back into the semi-balanced state.
*
* This implementation of AVL trees has the following peculiarities:
*
* - The AVL specific data structures are physically embedded as fields
* in the "using" data structures. To maintain generality the code
* must constantly translate between "avl_node_t *" and containing
* data structure "void *"s by adding/subracting the avl_offset.
*
* - Since the AVL data is always embedded in other structures, there is
* no locking or memory allocation in the AVL routines. This must be
* provided for by the enclosing data structure's semantics. Typically,
* avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
* exclusive write lock. Other operations require a read lock.
*
* - The implementation uses iteration instead of explicit recursion,
* since it is intended to run on limited size kernel stacks. Since
* there is no recursion stack present to move "up" in the tree,
* there is an explicit "parent" link in the avl_node_t.
*
* In the code, variables (instead of constants) are used to represent
* left and right indices. The implementation is written as if it only
* dealt with left handed manipulations. By changing the value assigned
* to "left", the code also works for right handed trees. The
*
* int left; // 0 when dealing with left children,
* // 1 for dealing with right children
*
* int left_heavy; // -1 when left subtree is taller at some node,
* // +1 when right subtree is taller
*
* int right; // will be the opposite of left (0 or 1)
* int right_heavy;// will be the opposite of left_heavy (-1 or 1)
*
* int direction; // 0 for "<" (ie. left child); 1 for ">" (right)
*
* Though it is a little more confusing to read the code, the approach
* allows using half as much code (and hence cache footprint) for tree
* manipulations and eliminates many conditional branches.
*
* - The avl_index_t is an opaque "cookie" used to find nodes at or
* adjacent to where a new value would be inserted in the tree. The value
* is a modified "avl_node_t *". The bottom bit (normally 0 for a
* pointer) is set to indicate if that the new node has a value greater
* than the value of the indicated "avl_node_t *".
*/
/*
* Small arrays to translate between balance (or diff) values and child indeces.
*
* Code that deals with binary tree data structures will randomly use
* left and right children when examining a tree. C "if()" statements
* which evaluate randomly suffer from very poor hardware branch prediction.
* In this code we avoid some of the branch mispredictions by using the
* following translation arrays. They replace random branches with an
* additional memory reference. Since the translation arrays are both very
* small the data should remain efficiently in cache.
*/
static const int avl_balance2child[] = {0, 0, 1};
/*
* Walk from one node to the previous valued node (ie. an infix walk
* towards the left). At any given node we do one of 2 things:
*
* - If there is a left child, go to it, then to it's rightmost descendant.
*
* - otherwise we return thru parent nodes until we've come from a right child.
*
* Return Value:
* NULL - if at the end of the nodes
* otherwise next node
*/
void *
{
int was_child;
/*
* nowhere to walk to if tree is empty
*/
return (NULL);
/*
* Visit the previous valued node. There are two possibilities:
*
* If this node has a left child, go down one left, then all
* the way right.
*/
;
/*
* Otherwise, return thru left children as far as we can.
*/
} else {
for (;;) {
return (NULL);
break;
}
}
}
/*
* Return the lowest valued node in a tree or NULL.
* (leftmost child from root of tree)
*/
void *
{
return (NULL);
}
/*
* Return the highest valued node in a tree or NULL.
* (rightmost child from root of tree)
*/
void *
{
return (NULL);
}
/*
* Access the node immediately before or after an insertion point.
*
* "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
*
* Return value:
* NULL: no node in the given direction
* "void *" of the found tree node
*/
void *
{
void *data;
return (NULL);
}
return (data);
}
/*
* Search for the node which contains "value". The algorithm is a
* simple binary tree search.
*
* return value:
* NULL: the value is not in the AVL tree
* *where (if not NULL) is set to indicate the insertion point
* "void *" of the found tree node
*/
void *
{
int child = 0;
int diff;
if (diff == 0) {
#ifdef DEBUG
#endif
}
}
return (NULL);
}
/*
* Perform a rotation to restore balance at the subtree given by depth.
*
* This routine is used by both insertion and deletion. The return value
* indicates:
* 0 : subtree did not change height
* !0 : subtree was reduced in height
*
* The code is written as if handling left rotations, right rotations are
*
* On input balance is the "new" balance at "node". This value is either
* -2 or +2.
*/
static int
{
int right_heavy = -left_heavy;
/* BEGIN CSTYLED */
/*
* case 1 : node is overly left heavy, the left child is balanced or
* also left heavy. This requires the following rotation.
*
* (node bal:-2)
* / \
* / \
* (child bal:0 or -1)
* / \
* / \
* cright
*
* becomes:
*
* (child bal:1 or 0)
* / \
* / \
* (node bal:-1 or 0)
* / \
* / \
* cright
*
* we detect this situation by noting that child's balance is not
* right_heavy.
*/
/* END CSTYLED */
if (child_bal != right_heavy) {
/*
* compute new balance of nodes
*
* If child used to be left heavy (now balanced) we reduced
* the height of this sub-tree -- used in "return...;" below
*/
/*
* move "cright" to be node's left child
*/
}
/*
* move node to be child's right child
*/
/*
* update the pointer into this subtree
*/
else
return (child_bal == 0);
}
/* BEGIN CSTYLED */
/*
* case 2 : When node is left heavy, but child is right heavy we use
* a different rotation.
*
* (node b:-2)
* / \
* / \
* / \
* (child b:+1)
* / \
* / \
* (gchild b: != 0)
* / \
* / \
* gleft gright
*
* becomes:
*
* (gchild b:0)
* / \
* / \
* / \
* (child b:?) (node b:?)
* / \ / \
* / \ / \
* gleft gright
*
* computing the new balances is more complicated. As an example:
* if gchild was right_heavy, then child is now left heavy
* else it is balanced
*/
/* END CSTYLED */
/*
* move gright to left child of node and
*
* move gleft to right child of node
*/
}
}
/*
* move child to left child of gchild and
*
* move node to right child of gchild and
*
* fixup parent of all this to point to gchild
*/
AVL_SETBALANCE(gchild, 0);
else
return (1); /* the new tree is always shorter */
}
/*
* Insert a new node into an AVL tree at the specified (from avl_find()) place.
*
* Newly inserted nodes are always leaf nodes in the tree, since avl_find()
* searches out to the leaf positions. The avl_index_t indicates the node
* which will be the parent of the new node.
*
* After the node is inserted, a single rotation further up the tree may
* be necessary to maintain an acceptable AVL balance.
*/
void
{
int old_balance;
int new_balance;
#ifdef _LP64
#endif
/*
* First, add the node to the tree at the indicated position.
*/
++tree->avl_numnodes;
AVL_SETBALANCE(node, 0);
} else {
}
/*
* Now, back up the tree modifying the balance of all nodes above the
* insertion point. If we get to a highly unbalanced ancestor, we
* need to do a rotation. If we back out of the tree we are done.
* If we brought any subtree into perfect balance (0), we are also done.
*/
for (;;) {
return;
/*
* Compute the new balance
*/
/*
* If we introduced equal balance, then we are done immediately
*/
if (new_balance == 0) {
AVL_SETBALANCE(node, 0);
return;
}
/*
* If both old and new are not zero we went
* from -1 to -2 balance, do a rotation.
*/
if (old_balance != 0)
break;
}
/*
* perform a rotation to fix the tree and return
*/
}
/*
* Insert "new_data" in "tree" in the given "direction" either after or
* before (AVL_AFTER, AVL_BEFORE) the data "here".
*
* Insertions can only be done at empty leaf points in the tree, therefore
* if the given child of the node is already present we move to either
* the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
* every other node in the tree is a leaf, this always works.
*
* To help developers using this interface, we assert that the new node
* is correctly ordered at every step of the way in DEBUG kernels.
*/
void
void *new_data,
void *here,
int direction)
{
#ifdef DEBUG
int diff;
#endif
/*
* If corresponding child of node is not NULL, go to the neighboring
* node and reverse the insertion direction.
*/
#ifdef DEBUG
#endif
#ifdef DEBUG
#endif
}
#ifdef DEBUG
#endif
}
}
/*
* Add a new node to an AVL tree.
*/
void
{
/*
* This is unfortunate. We want to call panic() here, even for
* non-DEBUG kernels. In userland, however, we can't depend on anything
* in libc or else the rtld build process gets confused. So, all we can
* do in userland is resort to a normal ASSERT().
*/
#ifdef _KERNEL
panic("avl_find() succeeded inside avl_add()");
#else
ASSERT(0);
#endif
}
/*
* Delete a node from the AVL tree. Deletion is similar to insertion, but
* with 2 complications.
*
* First, we may be deleting an interior node. Consider the following subtree:
*
* d c c
* / \ / \ / \
* b e b e b e
* / \ / \ /
* a c a a
*
* When we are deleting node (d), we find and bring up an adjacent valued leaf
* node, say (c), to take the interior node's place. In the code this is
* handled by temporarily swapping (d) and (c) in the tree and then using
* common code to delete (d) from the leaf position.
*
* Secondly, an interior deletion from a deep tree may require more than one
* rotation to fix the balance. This is handled by moving up the tree through
* parents and applying rotations as needed. The return value from
* avl_rotation() is used to detect when a subtree did not change overall
* height due to a rotation.
*/
void
{
int old_balance;
int new_balance;
int left;
int right;
int which_child;
/*
* Deletion is easiest with a node that has at most 1 child.
* We swap a node with 2 children with a sequentially valued
* neighbor node. That node will have at most 1 child. Note this
* has no effect on the ordering of the remaining nodes.
*
* As an optimization, we choose the greater neighbor if the tree
* is right heavy, otherwise the left neighbor. This reduces the
* number of rotations needed.
*/
/*
* choose node to swap from whichever side is taller
*/
/*
* get to the previous value'd node
* (down 1 left, as far as possible right)
*/
;
/*
* create a temp placeholder for 'node'
* move 'node' to delete's spot in the tree
*/
else
/*
* Put tmp where node used to be (just temporary).
* It always has a parent and at most 1 child.
*/
}
/*
* Here we know "delete" is at least partially a leaf node. It can
* be easily removed from the tree.
*/
--tree->avl_numnodes;
else
/*
* Connect parent directly to node (leaving out delete).
*/
}
return;
}
/*
* Since the subtree is now shorter, begin adjusting parent balances
* and performing any needed rotations.
*/
do {
/*
* Move up the tree and adjust the balance
*
* Capture the parent and which_child values for the next
* iteration before any rotations occur.
*/
/*
* If a node was in perfect balance but isn't anymore then
* we can stop, since the height didn't change above this point
* due to a deletion.
*/
if (old_balance == 0) {
break;
}
/*
* If the new balance is zero, we don't need to rotate
* else
* need a rotation to fix the balance.
* If the rotation doesn't change the height
* of the sub-tree we have finished adjusting.
*/
if (new_balance == 0)
break;
}
/*
* initialize a new AVL tree
*/
void
{
#ifdef _LP64
#endif
tree->avl_numnodes = 0;
}
/*
* Delete a tree.
*/
/* ARGSUSED */
void
{
}
/*
* Return the number of nodes in an AVL tree.
*/
{
return (tree->avl_numnodes);
}
#define CHILDBIT (1L)
/*
* Post-order tree walk used to visit all tree nodes and destroy the tree
* in post order. This is used for destroying a tree w/o paying any cost
* for rebalancing it.
*
* example:
*
* void *cookie = NULL;
* my_data_t *node;
*
* while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
* free(node);
* avl_destroy(tree);
*
* The cookie is really an avl_node_t to the current node's parent and
* an indication of which child you looked at last.
*
* On input, a cookie value of CHILDBIT indicates the tree is done.
*/
void *
{
int child;
void *first;
/*
* Initial calls go to the first node or it's right descendant.
*/
/*
* deal with an empty tree
*/
return (NULL);
}
goto check_right_side;
}
/*
* If there is no parent to return to we are done.
*/
tree->avl_numnodes = 0;
}
return (NULL);
}
/*
* Remove the child pointer we just visited from the parent and tree.
*/
--tree->avl_numnodes;
/*
* If we just did a right child or there isn't one, go up to parent.
*/
goto done;
}
/*
* Do parent's right child, then leftmost descendent.
*/
}
/*
* If here, we moved to a left child. It may have one
* child on the right (when balance == +1).
*/
} else {
}
done:
} else {
}
}