lookup3.c revision 1b4bb4fdac4dce4e658aa3743153d77c04d1a331
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering/* Slightly modified by Lennart Poettering, to avoid name clashes, and
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering * unexport a few functions. */
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering#include "lookup3.h"
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering/*
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering-------------------------------------------------------------------------------
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringlookup3.c, by Bob Jenkins, May 2006, Public Domain.
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart PoetteringThese are functions for producing 32-bit hashes for hash table lookup.
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringhashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringare externally useful functions. Routines to test the hash are included
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringif SELF_TEST is defined. You can use this free for any purpose. It's in
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringthe public domain. It has no warranty.
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart PoetteringYou probably want to use hashlittle(). hashlittle() and hashbig()
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringhash byte arrays. hashlittle() is faster than hashbig() on
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringlittle-endian machines. Intel and AMD are little-endian machines.
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart PoetteringOn second thought, you probably want hashlittle2(), which is identical to
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringhashlittle() except it returns two 32-bit hashes for the price of one.
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart PoetteringYou could implement hashbig2() if you wanted but I haven't bothered here.
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart PoetteringIf you want to find a hash of, say, exactly 7 integers, do
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering a = i1; b = i2; c = i3;
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek mix(a,b,c);
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek a += i4; b += i5; c += i6;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering mix(a,b,c);
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering a += i7;
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering final(a,b,c);
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringthen use c as the hash value. If you have a variable length array of
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering4-byte integers to hash, use hashword(). If you have a byte array (like
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringa character string), use hashlittle(). If you have several byte arrays, or
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringa mix of things, see the comments above hashlittle().
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart PoetteringWhy is this so big? I read 12 bytes at a time into 3 4-byte integers,
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringthen mix those integers. This is fast (you can do a lot more thorough
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringmixing with 12*3 instructions on 3 integers than you can with 3 instructions
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringon 1 byte), but shoehorning those bytes into integers efficiently is messy.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering-------------------------------------------------------------------------------
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering*/
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering/* #define SELF_TEST 1 */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#include <stdio.h> /* defines printf for tests */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#include <time.h> /* defines time_t for timings in the test */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#include <stdint.h> /* defines uint32_t etc */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#include <sys/param.h> /* attempt to define endianness */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#ifdef linux
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering# include <endian.h> /* attempt to define endianness */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#endif
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering/*
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering * My best guess at if you are big-endian or little-endian. This may
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering * need adjustment.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering __BYTE_ORDER == __LITTLE_ENDIAN) || \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering (defined(i386) || defined(__i386__) || defined(__i486__) || \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering# define HASH_LITTLE_ENDIAN 1
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering# define HASH_BIG_ENDIAN 0
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering __BYTE_ORDER == __BIG_ENDIAN) || \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering# define HASH_LITTLE_ENDIAN 0
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering# define HASH_BIG_ENDIAN 1
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#else
39883f622f392d8579f4428fc5a789a102efbb10Lennart Poettering# define HASH_LITTLE_ENDIAN 0
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering# define HASH_BIG_ENDIAN 0
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#endif
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#define hashsize(n) ((uint32_t)1<<(n))
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#define hashmask(n) (hashsize(n)-1)
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering/*
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering-------------------------------------------------------------------------------
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringmix -- mix 3 32-bit values reversibly.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart PoetteringThis is reversible, so any information in (a,b,c) before mix() is
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringstill in (a,b,c) after mix().
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart PoetteringIf four pairs of (a,b,c) inputs are run through mix(), or through
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringmix() in reverse, there are at least 32 bits of the output that
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringare sometimes the same for one pair and different for another pair.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart PoetteringThis was tested for:
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering* pairs that differed by one bit, by two bits, in any combination
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering of top bits of (a,b,c), or in any combination of bottom bits of
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering (a,b,c).
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering is commonly produced by subtraction) look like a single 1-bit
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering difference.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering* the base values were pseudorandom, all zero but one bit set, or
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering all zero plus a counter that starts at zero.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart PoetteringSome k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringsatisfy this are
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering 4 6 8 16 19 4
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering 9 15 3 18 27 15
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering 14 9 3 7 17 3
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart PoetteringWell, "9 15 3 18 27 15" didn't quite get 32 bits diffing
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringfor "differ" defined as + with a one-bit base and a two-bit delta. I
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringused http://burtleburtle.net/bob/hash/avalanche.html to choose
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringthe operations, constants, and arrangements of the variables.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart PoetteringThis does not achieve avalanche. There are input bits of (a,b,c)
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringthat fail to affect some output bits of (a,b,c), especially of a. The
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringmost thoroughly mixed value is c, but it doesn't really even achieve
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringavalanche in c.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart PoetteringThis allows some parallelism. Read-after-writes are good at doubling
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringthe number of bits affected, so the goal of mixing pulls in the opposite
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringdirection as the goal of parallelism. I did what I could. Rotates
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringseem to cost as much as shifts on every machine I could lay my hands
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringon, and rotates are much kinder to the top and bottom bits, so I used
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringrotates.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering-------------------------------------------------------------------------------
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering*/
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#define mix(a,b,c) \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering{ \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering a -= c; a ^= rot(c, 4); c += b; \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering b -= a; b ^= rot(a, 6); a += c; \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering c -= b; c ^= rot(b, 8); b += a; \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering a -= c; a ^= rot(c,16); c += b; \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering b -= a; b ^= rot(a,19); a += c; \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering c -= b; c ^= rot(b, 4); b += a; \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering}
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering/*
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering-------------------------------------------------------------------------------
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringfinal -- final mixing of 3 32-bit values (a,b,c) into c
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart PoetteringPairs of (a,b,c) values differing in only a few bits will usually
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringproduce values of c that look totally different. This was tested for
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering* pairs that differed by one bit, by two bits, in any combination
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering of top bits of (a,b,c), or in any combination of bottom bits of
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering (a,b,c).
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering is commonly produced by subtraction) look like a single 1-bit
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering difference.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering* the base values were pseudorandom, all zero but one bit set, or
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering all zero plus a counter that starts at zero.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart PoetteringThese constants passed:
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering 14 11 25 16 4 14 24
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering 12 14 25 16 4 14 24
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringand these came close:
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering 4 8 15 26 3 22 24
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering 10 8 15 26 3 22 24
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering 11 8 15 26 3 22 24
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering-------------------------------------------------------------------------------
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering*/
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#define final(a,b,c) \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering{ \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering c ^= b; c -= rot(b,14); \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering a ^= c; a -= rot(c,11); \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering b ^= a; b -= rot(a,25); \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering c ^= b; c -= rot(b,16); \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering a ^= c; a -= rot(c,4); \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering b ^= a; b -= rot(a,14); \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering c ^= b; c -= rot(b,24); \
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering}
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering/*
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering--------------------------------------------------------------------
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering This works on all machines. To be useful, it requires
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering -- that the key be an array of uint32_t's, and
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering -- that the length be the number of uint32_t's in the key
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering The function hashword() is identical to hashlittle() on little-endian
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering machines, and identical to hashbig() on big-endian machines,
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering except that the length has to be measured in uint32_ts rather than in
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering bytes. hashlittle() is more complicated than hashword() only because
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering hashlittle() has to dance around fitting the key bytes into registers.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering--------------------------------------------------------------------
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering*/
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringuint32_t jenkins_hashword(
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringconst uint32_t *k, /* the key, an array of uint32_t values */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringsize_t length, /* the length of the key, in uint32_ts */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringuint32_t initval) /* the previous hash, or an arbitrary value */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering{
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering uint32_t a,b,c;
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering /* Set up the internal state */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering /*------------------------------------------------- handle most of the key */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering while (length > 3)
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering {
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering a += k[0];
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering b += k[1];
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering c += k[2];
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering mix(a,b,c);
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering length -= 3;
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek k += 3;
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek }
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering /*------------------------------------------- handle the last 3 uint32_t's */
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering switch(length) /* all the case statements fall through */
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering {
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 3 : c+=k[2];
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 2 : b+=k[1];
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 1 : a+=k[0];
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering final(a,b,c);
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 0: /* case 0: nothing left to add */
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering }
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering /*------------------------------------------------------ report the result */
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering return c;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering}
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering
ef5bfcf668e6029faa78534dfeb2591df854cdefLennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering/*
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering--------------------------------------------------------------------
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmekhashword2() -- same as hashword(), but take two seeds and return two
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering32-bit values. pc and pb must both be nonnull, and *pc and *pb must
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringboth be initialized with seeds. If you pass in (*pb)==0, the output
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering(*pc) will be the same as the return value from hashword().
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering--------------------------------------------------------------------
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek*/
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmekvoid jenkins_hashword2 (
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringconst uint32_t *k, /* the key, an array of uint32_t values */
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringsize_t length, /* the length of the key, in uint32_ts */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringuint32_t *pc, /* IN: seed OUT: primary hash value */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringuint32_t *pb) /* IN: more seed OUT: secondary hash value */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering{
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering uint32_t a,b,c;
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering /* Set up the internal state */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering c += *pb;
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering /*------------------------------------------------- handle most of the key */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering while (length > 3)
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering {
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering a += k[0];
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering b += k[1];
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering c += k[2];
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering mix(a,b,c);
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering length -= 3;
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering k += 3;
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering }
875c6e1b48f37a07dfbb80d6653c73f205e94260Lennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering /*------------------------------------------- handle the last 3 uint32_t's */
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering switch(length) /* all the case statements fall through */
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek {
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering case 3 : c+=k[2];
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek case 2 : b+=k[1];
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering case 1 : a+=k[0];
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering final(a,b,c);
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek case 0: /* case 0: nothing left to add */
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering }
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering /*------------------------------------------------------ report the result */
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering *pc=c; *pb=b;
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering}
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering/*
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering-------------------------------------------------------------------------------
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poetteringhashlittle() -- hash a variable-length key into a 32-bit value
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering k : the key (the unaligned variable-length array of bytes)
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering length : the length of the key, counting by bytes
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering initval : can be any 4-byte value
be3f52f4ed02a9256b1577719677b32a17b525acLennart PoetteringReturns a 32-bit value. Every bit of the key affects every bit of
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poetteringthe return value. Two keys differing by one or two bits will have
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poetteringtotally different hash values.
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering
be3f52f4ed02a9256b1577719677b32a17b525acLennart PoetteringThe best hash table sizes are powers of 2. There is no need to do
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poetteringmod a prime (mod is sooo slow!). If you need less than 32 bits,
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poetteringuse a bitmask. For example, if you need only 10 bits, do
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering h = (h & hashmask(10));
be3f52f4ed02a9256b1577719677b32a17b525acLennart PoetteringIn which case, the hash table should have hashsize(10) elements.
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering
be3f52f4ed02a9256b1577719677b32a17b525acLennart PoetteringIf you are hashing n strings (uint8_t **)k, do it like this:
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
be3f52f4ed02a9256b1577719677b32a17b525acLennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart PoetteringBy Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmekcode any way you wish, private, educational, or commercial. It's free.
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart PoetteringUse for hash table lookup, or anything where one collision in 2^^32 is
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poetteringacceptable. Do NOT use for cryptographic purposes.
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering-------------------------------------------------------------------------------
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering*/
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poetteringuint32_t jenkins_hashlittle( const void *key, size_t length, uint32_t initval)
73e231abde39f22097df50542c745e01de879836Jan Engelhardt{
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering uint32_t a,b,c; /* internal state */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering /* Set up the internal state */
74df0fca09b3c31ed19e14ba80f996fdff772417Lennart Poettering a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering u.ptr = key;
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering while (length > 12)
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering {
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek a += k[0];
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek b += k[1];
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering c += k[2];
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering mix(a,b,c);
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek length -= 12;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering k += 3;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering }
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering /*----------------------------- handle the last (probably partial) block */
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering /*
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek * "k[2]&0xffffff" actually reads beyond the end of the string, but
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering * then masks off the part it's not allowed to read. Because the
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering * string is aligned, the masked-off tail is in the same word as the
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering * rest of the string. Every machine with memory protection I've seen
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering * does it on word boundaries, so is OK with this. But VALGRIND will
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering * still catch it and complain. The masking trick does make the hash
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering * noticeably faster for short strings (like English words).
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering */
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering#ifndef VALGRIND
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering
0f4ba83c397e807939a4eb0b2cbd04ad4ab548ccLennart Poettering switch(length)
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering {
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 8 : b+=k[1]; a+=k[0]; break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 6 : b+=k[1]&0xffff; a+=k[0]; break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 5 : b+=k[1]&0xff; a+=k[0]; break;
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek case 4 : a+=k[0]; break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 3 : a+=k[0]&0xffffff; break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 2 : a+=k[0]&0xffff; break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 1 : a+=k[0]&0xff; break;
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering case 0 : return c; /* zero length strings require no mixing */
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek }
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering#else /* make valgrind happy */
1ca208fb4f93e5869704af1812cbff7130a2fc03Zbigniew Jędrzejewski-Szmek {
3731acf1acfb4a6eb68374a5b137f3b368f63381Lennart Poettering const uint8_t *k8 = (const uint8_t *) k;
switch(length)
{
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
case 9 : c+=k8[8]; /* fall through */
case 8 : b+=k[1]; a+=k[0]; break;
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
case 5 : b+=k8[4]; /* fall through */
case 4 : a+=k[0]; break;
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
case 1 : a+=k8[0]; break;
case 0 : return c;
}
}
#endif /* !valgrind */
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
const uint8_t *k8;
/*--------------- all but last block: aligned reads and different mixing */
while (length > 12)
{
a += k[0] + (((uint32_t)k[1])<<16);
b += k[2] + (((uint32_t)k[3])<<16);
c += k[4] + (((uint32_t)k[5])<<16);
mix(a,b,c);
length -= 12;
k += 6;
}
/*----------------------------- handle the last (probably partial) block */
k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k[4]+(((uint32_t)k[5])<<16);
b+=k[2]+(((uint32_t)k[3])<<16);
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
case 10: c+=k[4];
b+=k[2]+(((uint32_t)k[3])<<16);
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 9 : c+=k8[8]; /* fall through */
case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
case 6 : b+=k[2];
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 5 : b+=k8[4]; /* fall through */
case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
case 2 : a+=k[0];
break;
case 1 : a+=k8[0];
break;
case 0 : return c; /* zero length requires no mixing */
}
} else { /* need to read the key one byte at a time */
const uint8_t *k = (const uint8_t *)key;
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
while (length > 12)
{
a += k[0];
a += ((uint32_t)k[1])<<8;
a += ((uint32_t)k[2])<<16;
a += ((uint32_t)k[3])<<24;
b += k[4];
b += ((uint32_t)k[5])<<8;
b += ((uint32_t)k[6])<<16;
b += ((uint32_t)k[7])<<24;
c += k[8];
c += ((uint32_t)k[9])<<8;
c += ((uint32_t)k[10])<<16;
c += ((uint32_t)k[11])<<24;
mix(a,b,c);
length -= 12;
k += 12;
}
/*-------------------------------- last block: affect all 32 bits of (c) */
switch(length) /* all the case statements fall through */
{
case 12: c+=((uint32_t)k[11])<<24;
case 11: c+=((uint32_t)k[10])<<16;
case 10: c+=((uint32_t)k[9])<<8;
case 9 : c+=k[8];
case 8 : b+=((uint32_t)k[7])<<24;
case 7 : b+=((uint32_t)k[6])<<16;
case 6 : b+=((uint32_t)k[5])<<8;
case 5 : b+=k[4];
case 4 : a+=((uint32_t)k[3])<<24;
case 3 : a+=((uint32_t)k[2])<<16;
case 2 : a+=((uint32_t)k[1])<<8;
case 1 : a+=k[0];
break;
case 0 : return c;
}
}
final(a,b,c);
return c;
}
/*
* hashlittle2: return 2 32-bit hash values
*
* This is identical to hashlittle(), except it returns two 32-bit hash
* values instead of just one. This is good enough for hash table
* lookup with 2^^64 buckets, or if you want a second hash if you're not
* happy with the first, or if you want a probably-unique 64-bit ID for
* the key. *pc is better mixed than *pb, so use *pc first. If you want
* a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
*/
void jenkins_hashlittle2(
const void *key, /* the key to hash */
size_t length, /* length of the key */
uint32_t *pc, /* IN: primary initval, OUT: primary hash */
uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
{
uint32_t a,b,c; /* internal state */
union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
/* Set up the internal state */
a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
c += *pb;
u.ptr = key;
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
while (length > 12)
{
a += k[0];
b += k[1];
c += k[2];
mix(a,b,c);
length -= 12;
k += 3;
}
/*----------------------------- handle the last (probably partial) block */
/*
* "k[2]&0xffffff" actually reads beyond the end of the string, but
* then masks off the part it's not allowed to read. Because the
* string is aligned, the masked-off tail is in the same word as the
* rest of the string. Every machine with memory protection I've seen
* does it on word boundaries, so is OK with this. But VALGRIND will
* still catch it and complain. The masking trick does make the hash
* noticeably faster for short strings (like English words).
*/
#ifndef VALGRIND
switch(length)
{
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
case 8 : b+=k[1]; a+=k[0]; break;
case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
case 6 : b+=k[1]&0xffff; a+=k[0]; break;
case 5 : b+=k[1]&0xff; a+=k[0]; break;
case 4 : a+=k[0]; break;
case 3 : a+=k[0]&0xffffff; break;
case 2 : a+=k[0]&0xffff; break;
case 1 : a+=k[0]&0xff; break;
case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
}
#else /* make valgrind happy */
{
const uint8_t *k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
case 9 : c+=k8[8]; /* fall through */
case 8 : b+=k[1]; a+=k[0]; break;
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
case 5 : b+=k8[4]; /* fall through */
case 4 : a+=k[0]; break;
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
case 1 : a+=k8[0]; break;
case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
}
}
#endif /* !valgrind */
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
const uint8_t *k8;
/*--------------- all but last block: aligned reads and different mixing */
while (length > 12)
{
a += k[0] + (((uint32_t)k[1])<<16);
b += k[2] + (((uint32_t)k[3])<<16);
c += k[4] + (((uint32_t)k[5])<<16);
mix(a,b,c);
length -= 12;
k += 6;
}
/*----------------------------- handle the last (probably partial) block */
k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k[4]+(((uint32_t)k[5])<<16);
b+=k[2]+(((uint32_t)k[3])<<16);
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
case 10: c+=k[4];
b+=k[2]+(((uint32_t)k[3])<<16);
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 9 : c+=k8[8]; /* fall through */
case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
case 6 : b+=k[2];
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 5 : b+=k8[4]; /* fall through */
case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
case 2 : a+=k[0];
break;
case 1 : a+=k8[0];
break;
case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
}
} else { /* need to read the key one byte at a time */
const uint8_t *k = (const uint8_t *)key;
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
while (length > 12)
{
a += k[0];
a += ((uint32_t)k[1])<<8;
a += ((uint32_t)k[2])<<16;
a += ((uint32_t)k[3])<<24;
b += k[4];
b += ((uint32_t)k[5])<<8;
b += ((uint32_t)k[6])<<16;
b += ((uint32_t)k[7])<<24;
c += k[8];
c += ((uint32_t)k[9])<<8;
c += ((uint32_t)k[10])<<16;
c += ((uint32_t)k[11])<<24;
mix(a,b,c);
length -= 12;
k += 12;
}
/*-------------------------------- last block: affect all 32 bits of (c) */
switch(length) /* all the case statements fall through */
{
case 12: c+=((uint32_t)k[11])<<24;
case 11: c+=((uint32_t)k[10])<<16;
case 10: c+=((uint32_t)k[9])<<8;
case 9 : c+=k[8];
case 8 : b+=((uint32_t)k[7])<<24;
case 7 : b+=((uint32_t)k[6])<<16;
case 6 : b+=((uint32_t)k[5])<<8;
case 5 : b+=k[4];
case 4 : a+=((uint32_t)k[3])<<24;
case 3 : a+=((uint32_t)k[2])<<16;
case 2 : a+=((uint32_t)k[1])<<8;
case 1 : a+=k[0];
break;
case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
}
}
final(a,b,c);
*pc=c; *pb=b;
}
/*
* hashbig():
* This is the same as hashword() on big-endian machines. It is different
* from hashlittle() on all machines. hashbig() takes advantage of
* big-endian byte ordering.
*/
uint32_t jenkins_hashbig( const void *key, size_t length, uint32_t initval)
{
uint32_t a,b,c;
union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
/* Set up the internal state */
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
u.ptr = key;
if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
while (length > 12)
{
a += k[0];
b += k[1];
c += k[2];
mix(a,b,c);
length -= 12;
k += 3;
}
/*----------------------------- handle the last (probably partial) block */
/*
* "k[2]<<8" actually reads beyond the end of the string, but
* then shifts out the part it's not allowed to read. Because the
* string is aligned, the illegal read is in the same word as the
* rest of the string. Every machine with memory protection I've seen
* does it on word boundaries, so is OK with this. But VALGRIND will
* still catch it and complain. The masking trick does make the hash
* noticeably faster for short strings (like English words).
*/
#ifndef VALGRIND
switch(length)
{
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
case 8 : b+=k[1]; a+=k[0]; break;
case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
case 4 : a+=k[0]; break;
case 3 : a+=k[0]&0xffffff00; break;
case 2 : a+=k[0]&0xffff0000; break;
case 1 : a+=k[0]&0xff000000; break;
case 0 : return c; /* zero length strings require no mixing */
}
#else /* make valgrind happy */
{
const uint8_t *k8 = (const uint8_t *)k;
switch(length) /* all the case statements fall through */
{
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
case 8 : b+=k[1]; a+=k[0]; break;
case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
case 4 : a+=k[0]; break;
case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
case 1 : a+=((uint32_t)k8[0])<<24; break;
case 0 : return c;
}
}
#endif /* !VALGRIND */
} else { /* need to read the key one byte at a time */
const uint8_t *k = (const uint8_t *)key;
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
while (length > 12)
{
a += ((uint32_t)k[0])<<24;
a += ((uint32_t)k[1])<<16;
a += ((uint32_t)k[2])<<8;
a += ((uint32_t)k[3]);
b += ((uint32_t)k[4])<<24;
b += ((uint32_t)k[5])<<16;
b += ((uint32_t)k[6])<<8;
b += ((uint32_t)k[7]);
c += ((uint32_t)k[8])<<24;
c += ((uint32_t)k[9])<<16;
c += ((uint32_t)k[10])<<8;
c += ((uint32_t)k[11]);
mix(a,b,c);
length -= 12;
k += 12;
}
/*-------------------------------- last block: affect all 32 bits of (c) */
switch(length) /* all the case statements fall through */
{
case 12: c+=k[11];
case 11: c+=((uint32_t)k[10])<<8;
case 10: c+=((uint32_t)k[9])<<16;
case 9 : c+=((uint32_t)k[8])<<24;
case 8 : b+=k[7];
case 7 : b+=((uint32_t)k[6])<<8;
case 6 : b+=((uint32_t)k[5])<<16;
case 5 : b+=((uint32_t)k[4])<<24;
case 4 : a+=k[3];
case 3 : a+=((uint32_t)k[2])<<8;
case 2 : a+=((uint32_t)k[1])<<16;
case 1 : a+=((uint32_t)k[0])<<24;
break;
case 0 : return c;
}
}
final(a,b,c);
return c;
}
#ifdef SELF_TEST
/* used for timings */
void driver1()
{
uint8_t buf[256];
uint32_t i;
uint32_t h=0;
time_t a,z;
time(&a);
for (i=0; i<256; ++i) buf[i] = 'x';
for (i=0; i<1; ++i)
{
h = hashlittle(&buf[0],1,h);
}
time(&z);
if (z-a > 0) printf("time %d %.8x\n", z-a, h);
}
/* check that every input bit changes every output bit half the time */
#define HASHSTATE 1
#define HASHLEN 1
#define MAXPAIR 60
#define MAXLEN 70
void driver2()
{
uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
uint32_t x[HASHSTATE],y[HASHSTATE];
uint32_t hlen;
printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
for (hlen=0; hlen < MAXLEN; ++hlen)
{
z=0;
for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
{
for (j=0; j<8; ++j) /*------------------------ for each input bit, */
{
for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
{
for (l=0; l<HASHSTATE; ++l)
e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
/*---- check that every output bit is affected by that input bit */
for (k=0; k<MAXPAIR; k+=2)
{
uint32_t finished=1;
/* keys have one bit different */
for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
/* have a and b be two keys differing in only one bit */
a[i] ^= (k<<j);
a[i] ^= (k>>(8-j));
c[0] = hashlittle(a, hlen, m);
b[i] ^= ((k+1)<<j);
b[i] ^= ((k+1)>>(8-j));
d[0] = hashlittle(b, hlen, m);
/* check every bit is 1, 0, set, and not set at least once */
for (l=0; l<HASHSTATE; ++l)
{
e[l] &= (c[l]^d[l]);
f[l] &= ~(c[l]^d[l]);
g[l] &= c[l];
h[l] &= ~c[l];
x[l] &= d[l];
y[l] &= ~d[l];
if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
}
if (finished) break;
}
if (k>z) z=k;
if (k==MAXPAIR)
{
printf("Some bit didn't change: ");
printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
e[0],f[0],g[0],h[0],x[0],y[0]);
printf("i %d j %d m %d len %d\n", i, j, m, hlen);
}
if (z==MAXPAIR) goto done;
}
}
}
done:
if (z < MAXPAIR)
{
printf("Mix success %2d bytes %2d initvals ",i,m);
printf("required %d trials\n", z/2);
}
}
printf("\n");
}
/* Check for reading beyond the end of the buffer and alignment problems */
void driver3()
{
uint8_t buf[MAXLEN+20], *b;
uint32_t len;
uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
uint32_t h;
uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
uint32_t i;
uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
uint32_t j;
uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
uint32_t ref,x,y;
uint8_t *p;
printf("Endianness. These lines should all be the same (for values filled in):\n");
printf("%.8x %.8x %.8x\n",
hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
p = q;
printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
p = &qq[1];
printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
p = &qqq[2];
printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
p = &qqqq[3];
printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
printf("\n");
/* check that hashlittle2 and hashlittle produce the same results */
i=47; j=0;
hashlittle2(q, sizeof(q), &i, &j);
if (hashlittle(q, sizeof(q), 47) != i)
printf("hashlittle2 and hashlittle mismatch\n");
/* check that hashword2 and hashword produce the same results */
len = 0xdeadbeef;
i=47, j=0;
hashword2(&len, 1, &i, &j);
if (hashword(&len, 1, 47) != i)
printf("hashword2 and hashword mismatch %x %x\n",
i, hashword(&len, 1, 47));
/* check hashlittle doesn't read before or after the ends of the string */
for (h=0, b=buf+1; h<8; ++h, ++b)
{
for (i=0; i<MAXLEN; ++i)
{
len = i;
for (j=0; j<i; ++j) *(b+j)=0;
/* these should all be equal */
ref = hashlittle(b, len, (uint32_t)1);
*(b+i)=(uint8_t)~0;
*(b-1)=(uint8_t)~0;
x = hashlittle(b, len, (uint32_t)1);
y = hashlittle(b, len, (uint32_t)1);
if ((ref != x) || (ref != y))
{
printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
h, i);
}
}
}
}
/* check for problems with nulls */
void driver4()
{
uint8_t buf[1];
uint32_t h,i,state[HASHSTATE];
buf[0] = ~0;
for (i=0; i<HASHSTATE; ++i) state[i] = 1;
printf("These should all be different\n");
for (i=0, h=0; i<8; ++i)
{
h = hashlittle(buf, 0, h);
printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
}
}
void driver5()
{
uint32_t b,c;
b=0, c=0, hashlittle2("", 0, &c, &b);
printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */
b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */
b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */
b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */
b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */
b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */
c = hashlittle("Four score and seven years ago", 30, 0);
printf("hash is %.8lx\n", c); /* 17770551 */
c = hashlittle("Four score and seven years ago", 30, 1);
printf("hash is %.8lx\n", c); /* cd628161 */
}
int main()
{
driver1(); /* test that the key is hashed: used for timings */
driver2(); /* test that whole key is hashed thoroughly */
driver3(); /* test that nothing but the key is hashed */
driver4(); /* test hashing multiple buffers (all buffers are null) */
driver5(); /* test the hash against known vectors */
return 1;
}
#endif /* SELF_TEST */