[lttng-ust.git] / liblttng-ust / jhash.h

/*
 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * version 2.1 of the License.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 */

#include <urcu/compiler.h>
#include <lttng/ust-endian.h>

/*
 * Hash function
 * Source: http://burtleburtle.net/bob/c/lookup3.c
 * Originally Public Domain
 */

#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))

#define mix(a, b, c) \
do { \
	a -= c; a ^= rot(c,  4); c += b; \
	b -= a; b ^= rot(a,  6); a += c; \
	c -= b; c ^= rot(b,  8); b += a; \
	a -= c; a ^= rot(c, 16); c += b; \
	b -= a; b ^= rot(a, 19); a += c; \
	c -= b; c ^= rot(b,  4); b += a; \
} while (0)

#define final(a, b, c) \
{ \
	c ^= b; c -= rot(b, 14); \
	a ^= c; a -= rot(c, 11); \
	b ^= a; b -= rot(a, 25); \
	c ^= b; c -= rot(b, 16); \
	a ^= c; a -= rot(c,  4);\
	b ^= a; b -= rot(a, 14); \
	c ^= b; c -= rot(b, 24); \
}

#if (BYTE_ORDER == LITTLE_ENDIAN)
#define HASH_LITTLE_ENDIAN	1
#else
#define HASH_LITTLE_ENDIAN	0
#endif

/*
 *
 * hashlittle() -- hash a variable-length key into a 32-bit value
 *   k       : the key (the unaligned variable-length array of bytes)
 *   length  : the length of the key, counting by bytes
 *   initval : can be any 4-byte value
 * Returns a 32-bit value.  Every bit of the key affects every bit of
 * the return value.  Two keys differing by one or two bits will have
 * totally different hash values.
 * 
 * The best hash table sizes are powers of 2.  There is no need to do
 * mod a prime (mod is sooo slow!).  If you need less than 32 bits,
 * use a bitmask.  For example, if you need only 10 bits, do
 *   h = (h & hashmask(10));
 * In which case, the hash table should have hashsize(10) elements.
 * 
 * If you are hashing n strings (uint8_t **)k, do it like this:
 *   for (i = 0, h = 0; i < n; ++i) h = hashlittle(k[i], len[i], h);
 * 
 * By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
 * code any way you wish, private, educational, or commercial.  It's free.
 * 
 * Use for hash table lookup, or anything where one collision in 2^^32 is
 * acceptable.  Do NOT use for cryptographic purposes.
 */
static
uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
{
	uint32_t a, b, c;	/* internal state */
	union {
		const void *ptr;
		size_t i;
	} u;

	/* Set up the internal state */
	a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;

	u.ptr = key;
	if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
		const uint32_t *k = (const uint32_t *) key;	/* read 32-bit chunks */

		/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
		while (length > 12) {
			a += k[0];
			b += k[1];
			c += k[2];
			mix(a, b, c);
			length -= 12;
			k += 3;
		}

		/*----------------------------- handle the last (probably partial) block */
		/* 
		 * "k[2]&0xffffff" actually reads beyond the end of the string, but
		 * then masks off the part it's not allowed to read.	Because the
		 * string is aligned, the masked-off tail is in the same word as the
		 * rest of the string.	Every machine with memory protection I've seen
		 * does it on word boundaries, so is OK with this.	But VALGRIND will
		 * still catch it and complain.	The masking trick does make the hash
		 * noticably faster for short strings (like English words).
		 */
#ifndef VALGRIND

		switch (length) {
		case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
		case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
		case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
		case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
		case 8 : b+=k[1]; a+=k[0]; break;
		case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
		case 6 : b+=k[1]&0xffff; a+=k[0]; break;
		case 5 : b+=k[1]&0xff; a+=k[0]; break;
		case 4 : a+=k[0]; break;
		case 3 : a+=k[0]&0xffffff; break;
		case 2 : a+=k[0]&0xffff; break;
		case 1 : a+=k[0]&0xff; break;
		case 0 : return c;		/* zero length strings require no mixing */
		}

#else /* make valgrind happy */
		{
			const uint8_t *k8;

			k8 = (const uint8_t *) k;
			switch (length) {
			case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
			case 11: c+=((uint32_t) k8[10])<<16;	/* fall through */
			case 10: c+=((uint32_t) k8[9])<<8;	/* fall through */
			case 9 : c+=k8[8];			/* fall through */
			case 8 : b+=k[1]; a+=k[0]; break;
			case 7 : b+=((uint32_t) k8[6])<<16;	/* fall through */
			case 6 : b+=((uint32_t) k8[5])<<8;	/* fall through */
			case 5 : b+=k8[4];			/* fall through */
			case 4 : a+=k[0]; break;
			case 3 : a+=((uint32_t) k8[2])<<16;	/* fall through */
			case 2 : a+=((uint32_t) k8[1])<<8;	/* fall through */
			case 1 : a+=k8[0]; break;
			case 0 : return c;
			}
		}
#endif /* !valgrind */

	} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
		const uint16_t *k = (const uint16_t *) key;	/* read 16-bit chunks */
		const uint8_t *k8;

		/*--------------- all but last block: aligned reads and different mixing */
		while (length > 12)
		{
			a += k[0] + (((uint32_t) k[1])<<16);
			b += k[2] + (((uint32_t) k[3])<<16);
			c += k[4] + (((uint32_t) k[5])<<16);
			mix(a, b, c);
			length -= 12;
			k += 6;
		}

		/*----------------------------- handle the last (probably partial) block */
		k8 = (const uint8_t *) k;
		switch(length)
		{
		case 12: c+=k[4]+(((uint32_t) k[5])<<16);
			 b+=k[2]+(((uint32_t) k[3])<<16);
			 a+=k[0]+(((uint32_t) k[1])<<16);
			 break;
		case 11: c+=((uint32_t) k8[10])<<16;	/* fall through */
		case 10: c+=k[4];
			 b+=k[2]+(((uint32_t) k[3])<<16);
			 a+=k[0]+(((uint32_t) k[1])<<16);
			 break;
		case 9 : c+=k8[8];			/* fall through */
		case 8 : b+=k[2]+(((uint32_t) k[3])<<16);
			 a+=k[0]+(((uint32_t) k[1])<<16);
			 break;
		case 7 : b+=((uint32_t) k8[6])<<16;	/* fall through */
		case 6 : b+=k[2];
			 a+=k[0]+(((uint32_t) k[1])<<16);
			 break;
		case 5 : b+=k8[4];			/* fall through */
		case 4 : a+=k[0]+(((uint32_t) k[1])<<16);
			 break;
		case 3 : a+=((uint32_t) k8[2])<<16;	/* fall through */
		case 2 : a+=k[0];
			 break;
		case 1 : a+=k8[0];
			 break;
		case 0 : return c;			/* zero length requires no mixing */
		}

	} else {					/* need to read the key one byte at a time */
		const uint8_t *k = (const uint8_t *)key;

		/*--------------- all but the last block: affect some 32 bits of (a, b, c) */
		while (length > 12) {
			a += k[0];
			a += ((uint32_t) k[1])<<8;
			a += ((uint32_t) k[2])<<16;
			a += ((uint32_t) k[3])<<24;
			b += k[4];
			b += ((uint32_t) k[5])<<8;
			b += ((uint32_t) k[6])<<16;
			b += ((uint32_t) k[7])<<24;
			c += k[8];
			c += ((uint32_t) k[9])<<8;
			c += ((uint32_t) k[10])<<16;
			c += ((uint32_t) k[11])<<24;
			mix(a,b,c);
			length -= 12;
			k += 12;
		}

		/*-------------------------------- last block: affect all 32 bits of (c) */
		switch (length) {		 /* all the case statements fall through */
		case 12: c+=((uint32_t) k[11])<<24;
		case 11: c+=((uint32_t) k[10])<<16;
		case 10: c+=((uint32_t) k[9])<<8;
		case 9 : c+=k[8];
		case 8 : b+=((uint32_t) k[7])<<24;
		case 7 : b+=((uint32_t) k[6])<<16;
		case 6 : b+=((uint32_t) k[5])<<8;
		case 5 : b+=k[4];
		case 4 : a+=((uint32_t) k[3])<<24;
		case 3 : a+=((uint32_t) k[2])<<16;
		case 2 : a+=((uint32_t) k[1])<<8;
		case 1 : a+=k[0];
			 break;
		case 0 : return c;
		}
	}

	final(a, b, c);
	return c;
}

static inline
uint32_t jhash(const void *key, size_t length, uint32_t seed)
{
	return hashlittle(key, length, seed);
}
Commit	Line	Data
	1	/*
	2	* Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	3	*
	4	* This library is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU Lesser General Public
	6	* License as published by the Free Software Foundation;
	7	* version 2.1 of the License.
	8	*
	9	* This library is distributed in the hope that it will be useful,
	10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	12	* Lesser General Public License for more details.
	13	*
	14	* You should have received a copy of the GNU Lesser General Public
	15	* License along with this library; if not, write to the Free Software
	16	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	17	*/
	18
	19	#include <urcu/compiler.h>
	20	#include <lttng/ust-endian.h>
	21
	22	/*
	23	* Hash function
	24	* Source: http://burtleburtle.net/bob/c/lookup3.c
	25	* Originally Public Domain
	26	*/
	27
	28	#define rot(x, k) (((x) << (k)) \| ((x) >> (32 - (k))))
	29
	30	#define mix(a, b, c) \
	31	do { \
	32	a -= c; a ^= rot(c, 4); c += b; \
	33	b -= a; b ^= rot(a, 6); a += c; \
	34	c -= b; c ^= rot(b, 8); b += a; \
	35	a -= c; a ^= rot(c, 16); c += b; \
	36	b -= a; b ^= rot(a, 19); a += c; \
	37	c -= b; c ^= rot(b, 4); b += a; \
	38	} while (0)
	39
	40	#define final(a, b, c) \
	41	{ \
	42	c ^= b; c -= rot(b, 14); \
	43	a ^= c; a -= rot(c, 11); \
	44	b ^= a; b -= rot(a, 25); \
	45	c ^= b; c -= rot(b, 16); \
	46	a ^= c; a -= rot(c, 4);\
	47	b ^= a; b -= rot(a, 14); \
	48	c ^= b; c -= rot(b, 24); \
	49	}
	50
	51	#if (BYTE_ORDER == LITTLE_ENDIAN)
	52	#define HASH_LITTLE_ENDIAN 1
	53	#else
	54	#define HASH_LITTLE_ENDIAN 0
	55	#endif
	56
	57	/*
	58	*
	59	* hashlittle() -- hash a variable-length key into a 32-bit value
	60	* k : the key (the unaligned variable-length array of bytes)
	61	* length : the length of the key, counting by bytes
	62	* initval : can be any 4-byte value
	63	* Returns a 32-bit value. Every bit of the key affects every bit of
	64	* the return value. Two keys differing by one or two bits will have
	65	* totally different hash values.
	66	*
	67	* The best hash table sizes are powers of 2. There is no need to do
	68	* mod a prime (mod is sooo slow!). If you need less than 32 bits,
	69	* use a bitmask. For example, if you need only 10 bits, do
	70	* h = (h & hashmask(10));
	71	* In which case, the hash table should have hashsize(10) elements.
	72	*
	73	* If you are hashing n strings (uint8_t **)k, do it like this:
	74	* for (i = 0, h = 0; i < n; ++i) h = hashlittle(k[i], len[i], h);
	75	*
	76	* By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
	77	* code any way you wish, private, educational, or commercial. It's free.
	78	*
	79	* Use for hash table lookup, or anything where one collision in 2^^32 is
	80	* acceptable. Do NOT use for cryptographic purposes.
	81	*/
	82	static
	83	uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
	84	{
	85	uint32_t a, b, c; /* internal state */
	86	union {
	87	const void *ptr;
	88	size_t i;
	89	} u;
	90
	91	/* Set up the internal state */
	92	a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
	93
	94	u.ptr = key;
	95	if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
	96	const uint32_t k = (const uint32_t ) key; /* read 32-bit chunks */
	97
	98	/------ all but last block: aligned reads and affect 32 bits of (a,b,c) /
	99	while (length > 12) {
	100	a += k[0];
	101	b += k[1];
	102	c += k[2];
	103	mix(a, b, c);
	104	length -= 12;
	105	k += 3;
	106	}
	107
	108	/----------------------------- handle the last (probably partial) block /
	109	/*
	110	* "k[2]&0xffffff" actually reads beyond the end of the string, but
	111	* then masks off the part it's not allowed to read. Because the
	112	* string is aligned, the masked-off tail is in the same word as the
	113	* rest of the string. Every machine with memory protection I've seen
	114	* does it on word boundaries, so is OK with this. But VALGRIND will
	115	* still catch it and complain. The masking trick does make the hash
	116	* noticably faster for short strings (like English words).
	117	*/
	118	#ifndef VALGRIND
	119
	120	switch (length) {
	121	case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
	122	case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
	123	case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
	124	case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
	125	case 8 : b+=k[1]; a+=k[0]; break;
	126	case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
	127	case 6 : b+=k[1]&0xffff; a+=k[0]; break;
	128	case 5 : b+=k[1]&0xff; a+=k[0]; break;
	129	case 4 : a+=k[0]; break;
	130	case 3 : a+=k[0]&0xffffff; break;
	131	case 2 : a+=k[0]&0xffff; break;
	132	case 1 : a+=k[0]&0xff; break;
	133	case 0 : return c; /* zero length strings require no mixing */
	134	}
	135
	136	#else /* make valgrind happy */
	137	{
	138	const uint8_t *k8;
	139
	140	k8 = (const uint8_t *) k;
	141	switch (length) {
	142	case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
	143	case 11: c+=((uint32_t) k8[10])<<16; /* fall through */
	144	case 10: c+=((uint32_t) k8[9])<<8; /* fall through */
	145	case 9 : c+=k8[8]; /* fall through */
	146	case 8 : b+=k[1]; a+=k[0]; break;
	147	case 7 : b+=((uint32_t) k8[6])<<16; /* fall through */
	148	case 6 : b+=((uint32_t) k8[5])<<8; /* fall through */
	149	case 5 : b+=k8[4]; /* fall through */
	150	case 4 : a+=k[0]; break;
	151	case 3 : a+=((uint32_t) k8[2])<<16; /* fall through */
	152	case 2 : a+=((uint32_t) k8[1])<<8; /* fall through */
	153	case 1 : a+=k8[0]; break;
	154	case 0 : return c;
	155	}
	156	}
	157	#endif /* !valgrind */
	158
	159	} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
	160	const uint16_t k = (const uint16_t ) key; /* read 16-bit chunks */
	161	const uint8_t *k8;
	162
	163	/--------------- all but last block: aligned reads and different mixing /
	164	while (length > 12)
	165	{
	166	a += k[0] + (((uint32_t) k[1])<<16);
	167	b += k[2] + (((uint32_t) k[3])<<16);
	168	c += k[4] + (((uint32_t) k[5])<<16);
	169	mix(a, b, c);
	170	length -= 12;
	171	k += 6;
	172	}
	173
	174	/----------------------------- handle the last (probably partial) block /
	175	k8 = (const uint8_t *) k;
	176	switch(length)
	177	{
	178	case 12: c+=k[4]+(((uint32_t) k[5])<<16);
	179	b+=k[2]+(((uint32_t) k[3])<<16);
	180	a+=k[0]+(((uint32_t) k[1])<<16);
	181	break;
	182	case 11: c+=((uint32_t) k8[10])<<16; /* fall through */
	183	case 10: c+=k[4];
	184	b+=k[2]+(((uint32_t) k[3])<<16);
	185	a+=k[0]+(((uint32_t) k[1])<<16);
	186	break;
	187	case 9 : c+=k8[8]; /* fall through */
	188	case 8 : b+=k[2]+(((uint32_t) k[3])<<16);
	189	a+=k[0]+(((uint32_t) k[1])<<16);
	190	break;
	191	case 7 : b+=((uint32_t) k8[6])<<16; /* fall through */
	192	case 6 : b+=k[2];
	193	a+=k[0]+(((uint32_t) k[1])<<16);
	194	break;
	195	case 5 : b+=k8[4]; /* fall through */
	196	case 4 : a+=k[0]+(((uint32_t) k[1])<<16);
	197	break;
	198	case 3 : a+=((uint32_t) k8[2])<<16; /* fall through */
	199	case 2 : a+=k[0];
	200	break;
	201	case 1 : a+=k8[0];
	202	break;
	203	case 0 : return c; /* zero length requires no mixing */
	204	}
	205
	206	} else { /* need to read the key one byte at a time */
	207	const uint8_t k = (const uint8_t )key;
	208
	209	/--------------- all but the last block: affect some 32 bits of (a, b, c) /
	210	while (length > 12) {
	211	a += k[0];
	212	a += ((uint32_t) k[1])<<8;
	213	a += ((uint32_t) k[2])<<16;
	214	a += ((uint32_t) k[3])<<24;
	215	b += k[4];
	216	b += ((uint32_t) k[5])<<8;
	217	b += ((uint32_t) k[6])<<16;
	218	b += ((uint32_t) k[7])<<24;
	219	c += k[8];
	220	c += ((uint32_t) k[9])<<8;
	221	c += ((uint32_t) k[10])<<16;
	222	c += ((uint32_t) k[11])<<24;
	223	mix(a,b,c);
	224	length -= 12;
	225	k += 12;
	226	}
	227
	228	/-------------------------------- last block: affect all 32 bits of (c) /
	229	switch (length) { /* all the case statements fall through */
	230	case 12: c+=((uint32_t) k[11])<<24;
	231	case 11: c+=((uint32_t) k[10])<<16;
	232	case 10: c+=((uint32_t) k[9])<<8;
	233	case 9 : c+=k[8];
	234	case 8 : b+=((uint32_t) k[7])<<24;
	235	case 7 : b+=((uint32_t) k[6])<<16;
	236	case 6 : b+=((uint32_t) k[5])<<8;
	237	case 5 : b+=k[4];
	238	case 4 : a+=((uint32_t) k[3])<<24;
	239	case 3 : a+=((uint32_t) k[2])<<16;
	240	case 2 : a+=((uint32_t) k[1])<<8;
	241	case 1 : a+=k[0];
	242	break;
	243	case 0 : return c;
	244	}
	245	}
	246
	247	final(a, b, c);
	248	return c;
	249	}
	250
	251	static inline
	252	uint32_t jhash(const void *key, size_t length, uint32_t seed)
	253	{
	254	return hashlittle(key, length, seed);
	255	}