From: David Goulet Date: Fri, 16 Dec 2011 22:14:21 +0000 (-0500) Subject: Add and support the new hash table library X-Git-Tag: v2.0-pre17~34 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=bec399405a4667411ae06bbbcbed678e42e93a30;p=lttng-tools.git Add and support the new hash table library liblttng-ht/ is the library introduced for hash table management. This library uses the URCU ht-shrink branch (not yet upstream) making the lib completely lockless. Import git head URCU hashtables at commit: 91a75cc579698814e47877cc8927fcae1f573739 Note that urcu hash table files are copied from the urcu git ree to this git tree waiting for them to be upstream and stable. Old hash table files are removed from libcommon. Signed-off-by: David Goulet --- diff --git a/Makefile.am b/Makefile.am index b744596b1..3b6344fa6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3,6 +3,7 @@ ACLOCAL_AMFLAGS = -I config SUBDIRS = common \ liblttng-sessiond-comm \ libkernelctl \ + liblttng-ht \ liblttng-kconsumer \ liblttng-ustconsumer \ liblttng-consumer \ diff --git a/common/Makefile.am b/common/Makefile.am index cb0b66c31..c855317fa 100644 --- a/common/Makefile.am +++ b/common/Makefile.am @@ -4,8 +4,4 @@ AM_CFLAGS = -fno-strict-aliasing noinst_LTLIBRARIES = libcommon.la -libcommon_la_SOURCES = hashtable.c hashtable.h \ - hashtable/rculfhash.c \ - hashtable/rculfhash.h \ - hashtable/hash.c hashtable/hash.h \ - runas.c +libcommon_la_SOURCES = runas.c diff --git a/common/hashtable.c b/common/hashtable.c deleted file mode 100644 index 2289f7adb..000000000 --- a/common/hashtable.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (C) 2011 - David Goulet - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; only version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#include - -#include - -#include "hashtable.h" -#include "hashtable/rculfhash.h" -#include "hashtable/hash.h" - -struct cds_lfht *hashtable_new(unsigned long size) -{ - if (size == 0) { - size = DEFAULT_HT_SIZE; - } - - return cds_lfht_new(hash_key, hash_compare_key, 0x42UL, - size, size, CDS_LFHT_AUTO_RESIZE, NULL); -} - -struct cds_lfht *hashtable_new_str(unsigned long size) -{ - if (size == 0) { - size = DEFAULT_HT_SIZE; - } - - return cds_lfht_new(hash_key_str, hash_compare_key_str, 0x42UL, - size, size, CDS_LFHT_AUTO_RESIZE, NULL); -} - -struct cds_lfht_node *hashtable_iter_get_node(struct cds_lfht_iter *iter) -{ - /* Safety net */ - if (iter == NULL) { - return NULL; - } - - return cds_lfht_iter_get_node(iter); -} - -struct cds_lfht_node *hashtable_lookup(struct cds_lfht *ht, void *key, - size_t key_len, struct cds_lfht_iter *iter) -{ - /* Safety net */ - if (ht == NULL || iter == NULL || key == NULL) { - return NULL; - } - - cds_lfht_lookup(ht, key, key_len, iter); - - return hashtable_iter_get_node(iter); -} - -void hashtable_get_first(struct cds_lfht *ht, struct cds_lfht_iter *iter) -{ - cds_lfht_first(ht, iter); -} - -void hashtable_get_next(struct cds_lfht *ht, struct cds_lfht_iter *iter) -{ - cds_lfht_next(ht, iter); -} - -void hashtable_add_unique(struct cds_lfht *ht, struct cds_lfht_node *node) -{ - cds_lfht_add_unique(ht, node); -} - -void hashtable_node_init(struct cds_lfht_node *node, void *key, - size_t key_len) -{ - cds_lfht_node_init(node, key, key_len); -} - -int hashtable_del(struct cds_lfht *ht, struct cds_lfht_iter *iter) -{ - /* Safety net */ - if (ht == NULL || iter == NULL) { - return -1; - } - - return cds_lfht_del(ht, iter); -} - -unsigned long hashtable_get_count(struct cds_lfht *ht) -{ - long ab, aa; - unsigned long count, removed; - - cds_lfht_count_nodes(ht, &ab, &count, &removed, &aa); - - return count; -} - -int hashtable_destroy(struct cds_lfht *ht) -{ - return cds_lfht_destroy(ht, NULL); -} diff --git a/common/hashtable.h b/common/hashtable.h deleted file mode 100644 index b7f7314f2..000000000 --- a/common/hashtable.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2011 - David Goulet - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; only version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#ifndef _LTT_HASHTABLE_H -#define _LTT_HASHTABLE_H - -#include -#include "hashtable/rculfhash.h" - -struct cds_lfht *hashtable_new(unsigned long size); -struct cds_lfht *hashtable_new_str(unsigned long size); - -struct cds_lfht_node *hashtable_iter_get_node(struct cds_lfht_iter *iter); -struct cds_lfht_node *hashtable_lookup(struct cds_lfht *ht, void *key, - size_t key_len, struct cds_lfht_iter *iter); - -void hashtable_get_first(struct cds_lfht *ht, struct cds_lfht_iter *iter); -void hashtable_get_next(struct cds_lfht *ht, struct cds_lfht_iter *iter); -void hashtable_add_unique(struct cds_lfht *ht, struct cds_lfht_node *node); -void hashtable_node_init(struct cds_lfht_node *node, - void *key, size_t key_len); - -int hashtable_del(struct cds_lfht *ht, struct cds_lfht_iter *iter); -unsigned long hashtable_get_count(struct cds_lfht *ht); -int hashtable_destroy(struct cds_lfht *ht); - -#endif /* _LTT_HASHTABLE_H */ diff --git a/common/hashtable/hash.c b/common/hashtable/hash.c deleted file mode 100644 index 12f76d834..000000000 --- a/common/hashtable/hash.c +++ /dev/null @@ -1,504 +0,0 @@ -/* - * Copyright (C) - Bob Jenkins, May 2006, Public Domain. - * Copyright (C) 2011 - David Goulet - * Copyright (C) 2011 - Mathieu Desnoyers - * - * These are functions for producing 32-bit hashes for hash table lookup. - * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() are - * externally useful functions. Routines to test the hash are included if - * SELF_TEST is defined. You can use this free for any purpose. It's in the - * public domain. It has no warranty. - * - * You probably want to use hashlittle(). hashlittle() and hashbig() hash byte - * arrays. hashlittle() is is faster than hashbig() on little-endian machines. - * Intel and AMD are little-endian machines. On second thought, you probably - * want hashlittle2(), which is identical to hashlittle() except it returns two - * 32-bit hashes for the price of one. You could implement hashbig2() if you - * wanted but I haven't bothered here. - * - * If you want to find a hash of, say, exactly 7 integers, do - * a = i1; b = i2; c = i3; - * mix(a,b,c); - * a += i4; b += i5; c += i6; - * mix(a,b,c); - * a += i7; - * final(a,b,c); - * then use c as the hash value. If you have a variable length array of - * 4-byte integers to hash, use hashword(). If you have a byte array (like - * a character string), use hashlittle(). If you have several byte arrays, or - * a mix of things, see the comments above hashlittle(). - * - * Why is this so big? I read 12 bytes at a time into 3 4-byte integers, then - * mix those integers. This is fast (you can do a lot more thorough mixing - * with 12*3 instructions on 3 integers than you can with 3 instructions on 1 - * byte), but shoehorning those bytes into integers efficiently is messy. - */ - -#include /* defines printf for tests */ -#include /* defines time_t for timings in the test */ -#include /* defines uint32_t etc */ -#include /* attempt to define endianness */ -#include /* attempt to define endianness */ -#include -#include -#include - -/* - * My best guess at if you are big-endian or little-endian. This may - * need adjustment. - */ -#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ - __BYTE_ORDER == __LITTLE_ENDIAN) || \ - (defined(i386) || defined(__i386__) || defined(__i486__) || \ - defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) -# define HASH_LITTLE_ENDIAN 1 -# define HASH_BIG_ENDIAN 0 -#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ - __BYTE_ORDER == __BIG_ENDIAN) || \ - (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) -# define HASH_LITTLE_ENDIAN 0 -# define HASH_BIG_ENDIAN 1 -#else -# define HASH_LITTLE_ENDIAN 0 -# define HASH_BIG_ENDIAN 0 -#endif - -#define hashsize(n) ((uint32_t)1<<(n)) -#define hashmask(n) (hashsize(n)-1) -#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) - -/* - * mix -- mix 3 32-bit values reversibly. - * - * This is reversible, so any information in (a,b,c) before mix() is - * still in (a,b,c) after mix(). - * - * If four pairs of (a,b,c) inputs are run through mix(), or through - * mix() in reverse, there are at least 32 bits of the output that - * are sometimes the same for one pair and different for another pair. - * This was tested for: - * * pairs that differed by one bit, by two bits, in any combination - * of top bits of (a,b,c), or in any combination of bottom bits of - * (a,b,c). - * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed - * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as - * is commonly produced by subtraction) look like a single 1-bit - * difference. - * * the base values were pseudorandom, all zero but one bit set, or - * all zero plus a counter that starts at zero. - * - * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that - * satisfy this are - * 4 6 8 16 19 4 - * 9 15 3 18 27 15 - * 14 9 3 7 17 3 - * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing - * for "differ" defined as + with a one-bit base and a two-bit delta. I - * used http://burtleburtle.net/bob/hash/avalanche.html to choose - * the operations, constants, and arrangements of the variables. - * - * This does not achieve avalanche. There are input bits of (a,b,c) - * that fail to affect some output bits of (a,b,c), especially of a. The - * most thoroughly mixed value is c, but it doesn't really even achieve - * avalanche in c. - * - * This allows some parallelism. Read-after-writes are good at doubling - * the number of bits affected, so the goal of mixing pulls in the opposite - * direction as the goal of parallelism. I did what I could. Rotates - * seem to cost as much as shifts on every machine I could lay my hands - * on, and rotates are much kinder to the top and bottom bits, so I used - * rotates. - */ -#define mix(a,b,c) \ -{ \ - a -= c; a ^= rot(c, 4); c += b; \ - b -= a; b ^= rot(a, 6); a += c; \ - c -= b; c ^= rot(b, 8); b += a; \ - a -= c; a ^= rot(c,16); c += b; \ - b -= a; b ^= rot(a,19); a += c; \ - c -= b; c ^= rot(b, 4); b += a; \ -} - -/* - * final -- final mixing of 3 32-bit values (a,b,c) into c - * - * Pairs of (a,b,c) values differing in only a few bits will usually - * produce values of c that look totally different. This was tested for - * * pairs that differed by one bit, by two bits, in any combination - * of top bits of (a,b,c), or in any combination of bottom bits of - * (a,b,c). - * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed - * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as - * is commonly produced by subtraction) look like a single 1-bit - * difference. - * * the base values were pseudorandom, all zero but one bit set, or - * all zero plus a counter that starts at zero. - * - * These constants passed: - * 14 11 25 16 4 14 24 - * 12 14 25 16 4 14 24 - * and these came close: - * 4 8 15 26 3 22 24 - * 10 8 15 26 3 22 24 - * 11 8 15 26 3 22 24 - */ -#define final(a,b,c) \ -{ \ - c ^= b; c -= rot(b,14); \ - a ^= c; a -= rot(c,11); \ - b ^= a; b -= rot(a,25); \ - c ^= b; c -= rot(b,16); \ - a ^= c; a -= rot(c,4); \ - b ^= a; b -= rot(a,14); \ - c ^= b; c -= rot(b,24); \ -} - -static __attribute__((unused)) -uint32_t hashword( - const uint32_t *k, /* the key, an array of uint32_t values */ - size_t length, /* the length of the key, in uint32_ts */ - uint32_t initval) /* the previous hash, or an arbitrary value */ -{ - uint32_t a, b, c; - - /* Set up the internal state */ - a = b = c = 0xdeadbeef + (((uint32_t) length) << 2) + initval; - - /*----------------------------------------- handle most of the key */ - while (length > 3) { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a, b, c); - length -= 3; - k += 3; - } - - /*----------------------------------- handle the last 3 uint32_t's */ - switch (length) { /* all the case statements fall through */ - case 3: c += k[2]; - case 2: b += k[1]; - case 1: a += k[0]; - final(a, b, c); - case 0: /* case 0: nothing left to add */ - break; - } - /*---------------------------------------------- report the result */ - return c; -} - - -/* - * hashword2() -- same as hashword(), but take two seeds and return two 32-bit - * values. pc and pb must both be nonnull, and *pc and *pb must both be - * initialized with seeds. If you pass in (*pb)==0, the output (*pc) will be - * the same as the return value from hashword(). - */ -static __attribute__((unused)) -void hashword2(const uint32_t *k, size_t length, - uint32_t *pc, uint32_t *pb) -{ - uint32_t a, b, c; - - /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t) (length << 2)) + *pc; - c += *pb; - - while (length > 3) { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a, b, c); - length -= 3; - k += 3; - } - - switch (length) { - case 3 : - c += k[2]; - case 2 : - b += k[1]; - case 1 : - a += k[0]; - final(a, b, c); - case 0: /* case 0: nothing left to add */ - break; - } - - *pc = c; - *pb = b; -} - -/* - * hashlittle() -- hash a variable-length key into a 32-bit value - * k : the key (the unaligned variable-length array of bytes) - * length : the length of the key, counting by bytes - * initval : can be any 4-byte value - * Returns a 32-bit value. Every bit of the key affects every bit of - * the return value. Two keys differing by one or two bits will have - * totally different hash values. - * - * The best hash table sizes are powers of 2. There is no need to do - * mod a prime (mod is sooo slow!). If you need less than 32 bits, - * use a bitmask. For example, if you need only 10 bits, do - * h = (h & hashmask(10)); - * In which case, the hash table should have hashsize(10) elements. - * - * If you are hashing n strings (uint8_t **)k, do it like this: - * for (i=0, h=0; i 12) { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 12; - k += 3; - } - - /* - * "k[2]&0xffffff" actually reads beyond the end of the string, but - * then masks off the part it's not allowed to read. Because the - * string is aligned, the masked-off tail is in the same word as the - * rest of the string. Every machine with memory protection I've seen - * does it on word boundaries, so is OK with this. But VALGRIND will - * still catch it and complain. The masking trick does make the hash - * noticably faster for short strings (like English words). - */ -#ifndef VALGRIND - - switch (length) { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; - case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; - case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=k[1]&0xffffff; a+=k[0]; break; - case 6 : b+=k[1]&0xffff; a+=k[0]; break; - case 5 : b+=k[1]&0xff; a+=k[0]; break; - case 4 : a+=k[0]; break; - case 3 : a+=k[0]&0xffffff; break; - case 2 : a+=k[0]&0xffff; break; - case 1 : a+=k[0]&0xff; break; - case 0 : return c; /* zero length strings require no mixing */ - } -#else /* make valgrind happy */ - const uint8_t *k8; - - k8 = (const uint8_t *)k; - switch (length) { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ - case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ - case 9 : c+=k8[8]; /* fall through */ - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ - case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ - case 5 : b+=k8[4]; /* fall through */ - case 4 : a+=k[0]; break; - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ - case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ - case 1 : a+=k8[0]; break; - case 0 : return c; - } -#endif /* !valgrind */ - } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { - const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ - const uint8_t *k8; - - /*--------------- all but last block: aligned reads and different mixing */ - while (length > 12) { - a += k[0] + (((uint32_t)k[1])<<16); - b += k[2] + (((uint32_t)k[3])<<16); - c += k[4] + (((uint32_t)k[5])<<16); - mix(a,b,c); - length -= 12; - k += 6; - } - - k8 = (const uint8_t *)k; - switch (length) { - case 12: - c+=k[4]+(((uint32_t)k[5])<<16); - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 11: - c+=((uint32_t)k8[10])<<16; /* fall through */ - case 10: - c+=k[4]; - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 9: - c+=k8[8]; /* fall through */ - case 8: - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 7: - b+=((uint32_t)k8[6])<<16; /* fall through */ - case 6: - b+=k[2]; - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 5: - b+=k8[4]; /* fall through */ - case 4: - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 3: - a+=((uint32_t)k8[2])<<16; /* fall through */ - case 2: - a+=k[0]; - break; - case 1: - a+=k8[0]; - break; - case 0: - return c; /* zero length requires no mixing */ - } - - } else { /* need to read the key one byte at a time */ - const uint8_t *k = (const uint8_t *)key; - - while (length > 12) { - a += k[0]; - a += ((uint32_t)k[1])<<8; - a += ((uint32_t)k[2])<<16; - a += ((uint32_t)k[3])<<24; - b += k[4]; - b += ((uint32_t)k[5])<<8; - b += ((uint32_t)k[6])<<16; - b += ((uint32_t)k[7])<<24; - c += k[8]; - c += ((uint32_t)k[9])<<8; - c += ((uint32_t)k[10])<<16; - c += ((uint32_t)k[11])<<24; - mix(a,b,c); - length -= 12; - k += 12; - } - - switch(length) { /* all the case statements fall through */ - case 12: c+=((uint32_t)k[11])<<24; - case 11: c+=((uint32_t)k[10])<<16; - case 10: c+=((uint32_t)k[9])<<8; - case 9: c+=k[8]; - case 8: b+=((uint32_t)k[7])<<24; - case 7: b+=((uint32_t)k[6])<<16; - case 6: b+=((uint32_t)k[5])<<8; - case 5: b+=k[4]; - case 4: a+=((uint32_t)k[3])<<24; - case 3: a+=((uint32_t)k[2])<<16; - case 2: a+=((uint32_t)k[1])<<8; - case 1: - a+=k[0]; - break; - case 0: - return c; - } - } - - final(a,b,c); - return c; -} - -#if (CAA_BITS_PER_LONG == 64) -/* - * Hash function for number value. - */ -unsigned long hash_key(void *_key, size_t length, unsigned long seed) -{ - union { - uint64_t v64; - uint32_t v32[2]; - } v; - union { - uint64_t v64; - uint32_t v32[2]; - } key; - - assert(length == sizeof(unsigned long)); - v.v64 = (uint64_t) seed; - key.v64 = (uint64_t) _key; - hashword2(key.v32, 2, &v.v32[0], &v.v32[1]); - return v.v64; -} -#else -/* - * Hash function for number value. - */ -unsigned long hash_key(void *_key, size_t length, unsigned long seed) -{ - uint32_t key = (uint32_t) _key; - - assert(length == sizeof(uint32_t)); - return hashword(&key, 1, seed); -} -#endif - -/* - * Hash function for string. - */ -unsigned long hash_key_str(void *key, size_t length, unsigned long seed) -{ - return hashlittle(key, length, seed); -} - -/* - * Hash function compare for number value. - */ -unsigned long hash_compare_key(void *key1, size_t key1_len, - void *key2, size_t key2_len) -{ - if (key1_len != key2_len) { - return -1; - } - - if (key1 == key2) { - return 0; - } - - return 1; -} - -/* - * Hash compare function for string. - */ -unsigned long hash_compare_key_str(void *key1, size_t key1_len, - void *key2, size_t key2_len) -{ - if (key1_len != key2_len) { - return -1; - } - - if (strncmp(key1, key2, key1_len) == 0) { - return 0; - } - - return 1; -} diff --git a/common/hashtable/hash.h b/common/hashtable/hash.h deleted file mode 100644 index 796920e5c..000000000 --- a/common/hashtable/hash.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) 2011 - David Goulet - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; only version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#ifndef _LTT_HASH_H -#define _LTT_HASH_H - -unsigned long hash_key(void *_key, size_t length, unsigned long seed); - -unsigned long hash_key_str(void *_key, size_t length, unsigned long seed); - -unsigned long hash_compare_key(void *key1, size_t key1_len, - void *key2, size_t key2_len); - -unsigned long hash_compare_key_str(void *key1, size_t key1_len, - void *key2, size_t key2_len); - -#endif /* _LTT_HASH_H */ diff --git a/common/hashtable/rculfhash.c b/common/hashtable/rculfhash.c deleted file mode 100644 index 2e8315314..000000000 --- a/common/hashtable/rculfhash.c +++ /dev/null @@ -1,1846 +0,0 @@ -/* - * rculfhash.c - * - * Userspace RCU library - Lock-Free Resizable RCU Hash Table - * - * Copyright 2010-2011 - Mathieu Desnoyers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* - * Based on the following articles: - * - Ori Shalev and Nir Shavit. Split-ordered lists: Lock-free - * extensible hash tables. J. ACM 53, 3 (May 2006), 379-405. - * - Michael, M. M. High performance dynamic lock-free hash tables - * and list-based sets. In Proceedings of the fourteenth annual ACM - * symposium on Parallel algorithms and architectures, ACM Press, - * (2002), 73-82. - * - * Some specificities of this Lock-Free Resizable RCU Hash Table - * implementation: - * - * - RCU read-side critical section allows readers to perform hash - * table lookups and use the returned objects safely by delaying - * memory reclaim of a grace period. - * - Add and remove operations are lock-free, and do not need to - * allocate memory. They need to be executed within RCU read-side - * critical section to ensure the objects they read are valid and to - * deal with the cmpxchg ABA problem. - * - add and add_unique operations are supported. add_unique checks if - * the node key already exists in the hash table. It ensures no key - * duplicata exists. - * - The resize operation executes concurrently with add/remove/lookup. - * - Hash table nodes are contained within a split-ordered list. This - * list is ordered by incrementing reversed-bits-hash value. - * - An index of dummy nodes is kept. These dummy nodes are the hash - * table "buckets", and they are also chained together in the - * split-ordered list, which allows recursive expansion. - * - The resize operation for small tables only allows expanding the hash table. - * It is triggered automatically by detecting long chains in the add - * operation. - * - The resize operation for larger tables (and available through an - * API) allows both expanding and shrinking the hash table. - * - Split-counters are used to keep track of the number of - * nodes within the hash table for automatic resize triggering. - * - Resize operation initiated by long chain detection is executed by a - * call_rcu thread, which keeps lock-freedom of add and remove. - * - Resize operations are protected by a mutex. - * - The removal operation is split in two parts: first, a "removed" - * flag is set in the next pointer within the node to remove. Then, - * a "garbage collection" is performed in the bucket containing the - * removed node (from the start of the bucket up to the removed node). - * All encountered nodes with "removed" flag set in their next - * pointers are removed from the linked-list. If the cmpxchg used for - * removal fails (due to concurrent garbage-collection or concurrent - * add), we retry from the beginning of the bucket. This ensures that - * the node with "removed" flag set is removed from the hash table - * (not visible to lookups anymore) before the RCU read-side critical - * section held across removal ends. Furthermore, this ensures that - * the node with "removed" flag set is removed from the linked-list - * before its memory is reclaimed. Only the thread which removal - * successfully set the "removed" flag (with a cmpxchg) into a node's - * next pointer is considered to have succeeded its removal (and thus - * owns the node to reclaim). Because we garbage-collect starting from - * an invariant node (the start-of-bucket dummy node) up to the - * "removed" node (or find a reverse-hash that is higher), we are sure - * that a successful traversal of the chain leads to a chain that is - * present in the linked-list (the start node is never removed) and - * that is does not contain the "removed" node anymore, even if - * concurrent delete/add operations are changing the structure of the - * list concurrently. - * - The add operation performs gargage collection of buckets if it - * encounters nodes with removed flag set in the bucket where it wants - * to add its new node. This ensures lock-freedom of add operation by - * helping the remover unlink nodes from the list rather than to wait - * for it do to so. - * - A RCU "order table" indexed by log2(hash index) is copied and - * expanded by the resize operation. This order table allows finding - * the "dummy node" tables. - * - There is one dummy node table per hash index order. The size of - * each dummy node table is half the number of hashes contained in - * this order (except for order 0). - * - synchronzie_rcu is used to garbage-collect the old dummy node table. - * - The per-order dummy node tables contain a compact version of the - * hash table nodes. These tables are invariant after they are - * populated into the hash table. - * - * Dummy node tables: - * - * hash table hash table the last all dummy node tables - * order size dummy node 0 1 2 3 4 5 6(index) - * table size - * 0 1 1 1 - * 1 2 1 1 1 - * 2 4 2 1 1 2 - * 3 8 4 1 1 2 4 - * 4 16 8 1 1 2 4 8 - * 5 32 16 1 1 2 4 8 16 - * 6 64 32 1 1 2 4 8 16 32 - * - * When growing/shrinking, we only focus on the last dummy node table - * which size is (!order ? 1 : (1 << (order -1))). - * - * Example for growing/shrinking: - * grow hash table from order 5 to 6: init the index=6 dummy node table - * shrink hash table from order 6 to 5: fini the index=6 dummy node table - * - * A bit of ascii art explanation: - * - * Order index is the off-by-one compare to the actual power of 2 because - * we use index 0 to deal with the 0 special-case. - * - * This shows the nodes for a small table ordered by reversed bits: - * - * bits reverse - * 0 000 000 - * 4 100 001 - * 2 010 010 - * 6 110 011 - * 1 001 100 - * 5 101 101 - * 3 011 110 - * 7 111 111 - * - * This shows the nodes in order of non-reversed bits, linked by - * reversed-bit order. - * - * order bits reverse - * 0 0 000 000 - * 1 | 1 001 100 <- - * 2 | | 2 010 010 <- | - * | | | 3 011 110 | <- | - * 3 -> | | | 4 100 001 | | - * -> | | 5 101 101 | - * -> | 6 110 011 - * -> 7 111 111 - */ - -#define _LGPL_SOURCE -#include -#include -#include -#include -#include -#include - -#include "config.h" -#include -#include -#include -#include -#include -#include -#include - -#include "rculfhash.h" - -#ifdef DEBUG -#define dbg_printf(fmt, args...) printf("[debug rculfhash] " fmt, ## args) -#else -#define dbg_printf(fmt, args...) -#endif - -/* - * Split-counters lazily update the global counter each 1024 - * addition/removal. It automatically keeps track of resize required. - * We use the bucket length as indicator for need to expand for small - * tables and machines lacking per-cpu data suppport. - */ -#define COUNT_COMMIT_ORDER 10 -#define DEFAULT_SPLIT_COUNT_MASK 0xFUL -#define CHAIN_LEN_TARGET 1 -#define CHAIN_LEN_RESIZE_THRESHOLD 3 - -/* - * Define the minimum table size. - */ -#define MIN_TABLE_SIZE 1 - -#if (CAA_BITS_PER_LONG == 32) -#define MAX_TABLE_ORDER 32 -#else -#define MAX_TABLE_ORDER 64 -#endif - -/* - * Minimum number of dummy nodes to touch per thread to parallelize grow/shrink. - */ -#define MIN_PARTITION_PER_THREAD_ORDER 12 -#define MIN_PARTITION_PER_THREAD (1UL << MIN_PARTITION_PER_THREAD_ORDER) - -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif - -#ifndef max -#define max(a, b) ((a) > (b) ? (a) : (b)) -#endif - -/* - * The removed flag needs to be updated atomically with the pointer. - * It indicates that no node must attach to the node scheduled for - * removal, and that node garbage collection must be performed. - * The dummy flag does not require to be updated atomically with the - * pointer, but it is added as a pointer low bit flag to save space. - */ -#define REMOVED_FLAG (1UL << 0) -#define DUMMY_FLAG (1UL << 1) -#define FLAGS_MASK ((1UL << 2) - 1) - -/* Value of the end pointer. Should not interact with flags. */ -#define END_VALUE NULL - -/* - * ht_items_count: Split-counters counting the number of node addition - * and removal in the table. Only used if the CDS_LFHT_ACCOUNTING flag - * is set at hash table creation. - * - * These are free-running counters, never reset to zero. They count the - * number of add/remove, and trigger every (1 << COUNT_COMMIT_ORDER) - * operations to update the global counter. We choose a power-of-2 value - * for the trigger to deal with 32 or 64-bit overflow of the counter. - */ -struct ht_items_count { - unsigned long add, del; -} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); - -/* - * rcu_level: Contains the per order-index-level dummy node table. The - * size of each dummy node table is half the number of hashes contained - * in this order (except for order 0). The minimum allocation size - * parameter allows combining the dummy node arrays of the lowermost - * levels to improve cache locality for small index orders. - */ -struct rcu_level { - /* Note: manually update allocation length when adding a field */ - struct _cds_lfht_node nodes[0]; -}; - -/* - * rcu_table: Contains the size and desired new size if a resize - * operation is in progress, as well as the statically-sized array of - * rcu_level pointers. - */ -struct rcu_table { - unsigned long size; /* always a power of 2, shared (RCU) */ - unsigned long resize_target; - int resize_initiated; - struct rcu_level *tbl[MAX_TABLE_ORDER]; -}; - -/* - * cds_lfht: Top-level data structure representing a lock-free hash - * table. Defined in the implementation file to make it be an opaque - * cookie to users. - */ -struct cds_lfht { - struct rcu_table t; - cds_lfht_hash_fct hash_fct; - cds_lfht_compare_fct compare_fct; - unsigned long min_alloc_order; - unsigned long min_alloc_size; - unsigned long hash_seed; - int flags; - /* - * We need to put the work threads offline (QSBR) when taking this - * mutex, because we use synchronize_rcu within this mutex critical - * section, which waits on read-side critical sections, and could - * therefore cause grace-period deadlock if we hold off RCU G.P. - * completion. - */ - pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */ - unsigned int in_progress_resize, in_progress_destroy; - void (*cds_lfht_call_rcu)(struct rcu_head *head, - void (*func)(struct rcu_head *head)); - void (*cds_lfht_synchronize_rcu)(void); - void (*cds_lfht_rcu_read_lock)(void); - void (*cds_lfht_rcu_read_unlock)(void); - void (*cds_lfht_rcu_thread_offline)(void); - void (*cds_lfht_rcu_thread_online)(void); - void (*cds_lfht_rcu_register_thread)(void); - void (*cds_lfht_rcu_unregister_thread)(void); - pthread_attr_t *resize_attr; /* Resize threads attributes */ - long count; /* global approximate item count */ - struct ht_items_count *split_count; /* split item count */ -}; - -/* - * rcu_resize_work: Contains arguments passed to RCU worker thread - * responsible for performing lazy resize. - */ -struct rcu_resize_work { - struct rcu_head head; - struct cds_lfht *ht; -}; - -/* - * partition_resize_work: Contains arguments passed to worker threads - * executing the hash table resize on partitions of the hash table - * assigned to each processor's worker thread. - */ -struct partition_resize_work { - pthread_t thread_id; - struct cds_lfht *ht; - unsigned long i, start, len; - void (*fct)(struct cds_lfht *ht, unsigned long i, - unsigned long start, unsigned long len); -}; - -static -void _cds_lfht_add(struct cds_lfht *ht, - unsigned long size, - struct cds_lfht_node *node, - struct cds_lfht_iter *unique_ret, - int dummy); - -/* - * Algorithm to reverse bits in a word by lookup table, extended to - * 64-bit words. - * Source: - * http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable - * Originally from Public Domain. - */ - -static const uint8_t BitReverseTable256[256] = -{ -#define R2(n) (n), (n) + 2*64, (n) + 1*64, (n) + 3*64 -#define R4(n) R2(n), R2((n) + 2*16), R2((n) + 1*16), R2((n) + 3*16) -#define R6(n) R4(n), R4((n) + 2*4 ), R4((n) + 1*4 ), R4((n) + 3*4 ) - R6(0), R6(2), R6(1), R6(3) -}; -#undef R2 -#undef R4 -#undef R6 - -static -uint8_t bit_reverse_u8(uint8_t v) -{ - return BitReverseTable256[v]; -} - -static __attribute__((unused)) -uint32_t bit_reverse_u32(uint32_t v) -{ - return ((uint32_t) bit_reverse_u8(v) << 24) | - ((uint32_t) bit_reverse_u8(v >> 8) << 16) | - ((uint32_t) bit_reverse_u8(v >> 16) << 8) | - ((uint32_t) bit_reverse_u8(v >> 24)); -} - -static __attribute__((unused)) -uint64_t bit_reverse_u64(uint64_t v) -{ - return ((uint64_t) bit_reverse_u8(v) << 56) | - ((uint64_t) bit_reverse_u8(v >> 8) << 48) | - ((uint64_t) bit_reverse_u8(v >> 16) << 40) | - ((uint64_t) bit_reverse_u8(v >> 24) << 32) | - ((uint64_t) bit_reverse_u8(v >> 32) << 24) | - ((uint64_t) bit_reverse_u8(v >> 40) << 16) | - ((uint64_t) bit_reverse_u8(v >> 48) << 8) | - ((uint64_t) bit_reverse_u8(v >> 56)); -} - -static -unsigned long bit_reverse_ulong(unsigned long v) -{ -#if (CAA_BITS_PER_LONG == 32) - return bit_reverse_u32(v); -#else - return bit_reverse_u64(v); -#endif -} - -/* - * fls: returns the position of the most significant bit. - * Returns 0 if no bit is set, else returns the position of the most - * significant bit (from 1 to 32 on 32-bit, from 1 to 64 on 64-bit). - */ -#if defined(__i386) || defined(__x86_64) -static inline -unsigned int fls_u32(uint32_t x) -{ - int r; - - asm("bsrl %1,%0\n\t" - "jnz 1f\n\t" - "movl $-1,%0\n\t" - "1:\n\t" - : "=r" (r) : "rm" (x)); - return r + 1; -} -#define HAS_FLS_U32 -#endif - -#if defined(__x86_64) -static inline -unsigned int fls_u64(uint64_t x) -{ - long r; - - asm("bsrq %1,%0\n\t" - "jnz 1f\n\t" - "movq $-1,%0\n\t" - "1:\n\t" - : "=r" (r) : "rm" (x)); - return r + 1; -} -#define HAS_FLS_U64 -#endif - -#ifndef HAS_FLS_U64 -static __attribute__((unused)) -unsigned int fls_u64(uint64_t x) -{ - unsigned int r = 64; - - if (!x) - return 0; - - if (!(x & 0xFFFFFFFF00000000ULL)) { - x <<= 32; - r -= 32; - } - if (!(x & 0xFFFF000000000000ULL)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xFF00000000000000ULL)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xF000000000000000ULL)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xC000000000000000ULL)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x8000000000000000ULL)) { - x <<= 1; - r -= 1; - } - return r; -} -#endif - -#ifndef HAS_FLS_U32 -static __attribute__((unused)) -unsigned int fls_u32(uint32_t x) -{ - unsigned int r = 32; - - if (!x) - return 0; - if (!(x & 0xFFFF0000U)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xFF000000U)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xF0000000U)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xC0000000U)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000U)) { - x <<= 1; - r -= 1; - } - return r; -} -#endif - -unsigned int fls_ulong(unsigned long x) -{ -#if (CAA_BITS_PER_LONG == 32) - return fls_u32(x); -#else - return fls_u64(x); -#endif -} - -/* - * Return the minimum order for which x <= (1UL << order). - * Return -1 if x is 0. - */ -int get_count_order_u32(uint32_t x) -{ - if (!x) - return -1; - - return fls_u32(x - 1); -} - -/* - * Return the minimum order for which x <= (1UL << order). - * Return -1 if x is 0. - */ -int get_count_order_ulong(unsigned long x) -{ - if (!x) - return -1; - - return fls_ulong(x - 1); -} - -#ifdef POISON_FREE -#define poison_free(ptr) \ - do { \ - if (ptr) { \ - memset(ptr, 0x42, sizeof(*(ptr))); \ - free(ptr); \ - } \ - } while (0) -#else -#define poison_free(ptr) free(ptr) -#endif - -static -void cds_lfht_resize_lazy(struct cds_lfht *ht, unsigned long size, int growth); - -static -void cds_lfht_resize_lazy_count(struct cds_lfht *ht, unsigned long size, - unsigned long count); - -static long nr_cpus_mask = -1; -static long split_count_mask = -1; - -#if defined(HAVE_SYSCONF) -static void ht_init_nr_cpus_mask(void) -{ - long maxcpus; - - maxcpus = sysconf(_SC_NPROCESSORS_CONF); - if (maxcpus <= 0) { - nr_cpus_mask = -2; - return; - } - /* - * round up number of CPUs to next power of two, so we - * can use & for modulo. - */ - maxcpus = 1UL << get_count_order_ulong(maxcpus); - nr_cpus_mask = maxcpus - 1; -} -#else /* #if defined(HAVE_SYSCONF) */ -static void ht_init_nr_cpus_mask(void) -{ - nr_cpus_mask = -2; -} -#endif /* #else #if defined(HAVE_SYSCONF) */ - -static -void alloc_split_items_count(struct cds_lfht *ht) -{ - struct ht_items_count *count; - - if (nr_cpus_mask == -1) { - ht_init_nr_cpus_mask(); - if (nr_cpus_mask < 0) - split_count_mask = DEFAULT_SPLIT_COUNT_MASK; - else - split_count_mask = nr_cpus_mask; - } - - assert(split_count_mask >= 0); - - if (ht->flags & CDS_LFHT_ACCOUNTING) { - ht->split_count = calloc(split_count_mask + 1, sizeof(*count)); - assert(ht->split_count); - } else { - ht->split_count = NULL; - } -} - -static -void free_split_items_count(struct cds_lfht *ht) -{ - poison_free(ht->split_count); -} - -#if defined(HAVE_SCHED_GETCPU) -static -int ht_get_split_count_index(unsigned long hash) -{ - int cpu; - - assert(split_count_mask >= 0); - cpu = sched_getcpu(); - if (caa_unlikely(cpu < 0)) - return hash & split_count_mask; - else - return cpu & split_count_mask; -} -#else /* #if defined(HAVE_SCHED_GETCPU) */ -static -int ht_get_split_count_index(unsigned long hash) -{ - return hash & split_count_mask; -} -#endif /* #else #if defined(HAVE_SCHED_GETCPU) */ - -static -void ht_count_add(struct cds_lfht *ht, unsigned long size, unsigned long hash) -{ - unsigned long split_count; - int index; - - if (caa_unlikely(!ht->split_count)) - return; - index = ht_get_split_count_index(hash); - split_count = uatomic_add_return(&ht->split_count[index].add, 1); - if (caa_unlikely(!(split_count & ((1UL << COUNT_COMMIT_ORDER) - 1)))) { - long count; - - dbg_printf("add split count %lu\n", split_count); - count = uatomic_add_return(&ht->count, - 1UL << COUNT_COMMIT_ORDER); - /* If power of 2 */ - if (!(count & (count - 1))) { - if ((count >> CHAIN_LEN_RESIZE_THRESHOLD) < size) - return; - dbg_printf("add set global %ld\n", count); - cds_lfht_resize_lazy_count(ht, size, - count >> (CHAIN_LEN_TARGET - 1)); - } - } -} - -static -void ht_count_del(struct cds_lfht *ht, unsigned long size, unsigned long hash) -{ - unsigned long split_count; - int index; - - if (caa_unlikely(!ht->split_count)) - return; - index = ht_get_split_count_index(hash); - split_count = uatomic_add_return(&ht->split_count[index].del, 1); - if (caa_unlikely(!(split_count & ((1UL << COUNT_COMMIT_ORDER) - 1)))) { - long count; - - dbg_printf("del split count %lu\n", split_count); - count = uatomic_add_return(&ht->count, - -(1UL << COUNT_COMMIT_ORDER)); - /* If power of 2 */ - if (!(count & (count - 1))) { - if ((count >> CHAIN_LEN_RESIZE_THRESHOLD) >= size) - return; - dbg_printf("del set global %ld\n", count); - /* - * Don't shrink table if the number of nodes is below a - * certain threshold. - */ - if (count < (1UL << COUNT_COMMIT_ORDER) * (split_count_mask + 1)) - return; - cds_lfht_resize_lazy_count(ht, size, - count >> (CHAIN_LEN_TARGET - 1)); - } - } -} - -static -void check_resize(struct cds_lfht *ht, unsigned long size, uint32_t chain_len) -{ - unsigned long count; - - if (!(ht->flags & CDS_LFHT_AUTO_RESIZE)) - return; - count = uatomic_read(&ht->count); - /* - * Use bucket-local length for small table expand and for - * environments lacking per-cpu data support. - */ - if (count >= (1UL << COUNT_COMMIT_ORDER)) - return; - if (chain_len > 100) - dbg_printf("WARNING: large chain length: %u.\n", - chain_len); - if (chain_len >= CHAIN_LEN_RESIZE_THRESHOLD) - cds_lfht_resize_lazy(ht, size, - get_count_order_u32(chain_len - (CHAIN_LEN_TARGET - 1))); -} - -static -struct cds_lfht_node *clear_flag(struct cds_lfht_node *node) -{ - return (struct cds_lfht_node *) (((unsigned long) node) & ~FLAGS_MASK); -} - -static -int is_removed(struct cds_lfht_node *node) -{ - return ((unsigned long) node) & REMOVED_FLAG; -} - -static -struct cds_lfht_node *flag_removed(struct cds_lfht_node *node) -{ - return (struct cds_lfht_node *) (((unsigned long) node) | REMOVED_FLAG); -} - -static -int is_dummy(struct cds_lfht_node *node) -{ - return ((unsigned long) node) & DUMMY_FLAG; -} - -static -struct cds_lfht_node *flag_dummy(struct cds_lfht_node *node) -{ - return (struct cds_lfht_node *) (((unsigned long) node) | DUMMY_FLAG); -} - -static -struct cds_lfht_node *get_end(void) -{ - return (struct cds_lfht_node *) END_VALUE; -} - -static -int is_end(struct cds_lfht_node *node) -{ - return clear_flag(node) == (struct cds_lfht_node *) END_VALUE; -} - -static -unsigned long _uatomic_max(unsigned long *ptr, unsigned long v) -{ - unsigned long old1, old2; - - old1 = uatomic_read(ptr); - do { - old2 = old1; - if (old2 >= v) - return old2; - } while ((old1 = uatomic_cmpxchg(ptr, old2, v)) != old2); - return v; -} - -static -struct _cds_lfht_node *lookup_bucket(struct cds_lfht *ht, unsigned long size, - unsigned long hash) -{ - unsigned long index, order; - - assert(size > 0); - index = hash & (size - 1); - - if (index < ht->min_alloc_size) { - dbg_printf("lookup hash %lu index %lu order 0 aridx 0\n", - hash, index); - return &ht->t.tbl[0]->nodes[index]; - } - /* - * equivalent to get_count_order_ulong(index + 1), but optimizes - * away the non-existing 0 special-case for - * get_count_order_ulong. - */ - order = fls_ulong(index); - dbg_printf("lookup hash %lu index %lu order %lu aridx %lu\n", - hash, index, order, index & ((1UL << (order - 1)) - 1)); - return &ht->t.tbl[order]->nodes[index & ((1UL << (order - 1)) - 1)]; -} - -/* - * Remove all logically deleted nodes from a bucket up to a certain node key. - */ -static -void _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node) -{ - struct cds_lfht_node *iter_prev, *iter, *next, *new_next; - - assert(!is_dummy(dummy)); - assert(!is_removed(dummy)); - assert(!is_dummy(node)); - assert(!is_removed(node)); - for (;;) { - iter_prev = dummy; - /* We can always skip the dummy node initially */ - iter = rcu_dereference(iter_prev->p.next); - assert(!is_removed(iter)); - assert(iter_prev->p.reverse_hash <= node->p.reverse_hash); - /* - * We should never be called with dummy (start of chain) - * and logically removed node (end of path compression - * marker) being the actual same node. This would be a - * bug in the algorithm implementation. - */ - assert(dummy != node); - for (;;) { - if (caa_unlikely(is_end(iter))) - return; - if (caa_likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash)) - return; - next = rcu_dereference(clear_flag(iter)->p.next); - if (caa_likely(is_removed(next))) - break; - iter_prev = clear_flag(iter); - iter = next; - } - assert(!is_removed(iter)); - if (is_dummy(iter)) - new_next = flag_dummy(clear_flag(next)); - else - new_next = clear_flag(next); - (void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next); - } - return; -} - -static -int _cds_lfht_replace(struct cds_lfht *ht, unsigned long size, - struct cds_lfht_node *old_node, - struct cds_lfht_node *old_next, - struct cds_lfht_node *new_node) -{ - struct cds_lfht_node *dummy, *ret_next; - struct _cds_lfht_node *lookup; - - if (!old_node) /* Return -ENOENT if asked to replace NULL node */ - return -ENOENT; - - assert(!is_removed(old_node)); - assert(!is_dummy(old_node)); - assert(!is_removed(new_node)); - assert(!is_dummy(new_node)); - assert(new_node != old_node); - for (;;) { - /* Insert after node to be replaced */ - if (is_removed(old_next)) { - /* - * Too late, the old node has been removed under us - * between lookup and replace. Fail. - */ - return -ENOENT; - } - assert(!is_dummy(old_next)); - assert(new_node != clear_flag(old_next)); - new_node->p.next = clear_flag(old_next); - /* - * Here is the whole trick for lock-free replace: we add - * the replacement node _after_ the node we want to - * replace by atomically setting its next pointer at the - * same time we set its removal flag. Given that - * the lookups/get next use an iterator aware of the - * next pointer, they will either skip the old node due - * to the removal flag and see the new node, or use - * the old node, but will not see the new one. - */ - ret_next = uatomic_cmpxchg(&old_node->p.next, - old_next, flag_removed(new_node)); - if (ret_next == old_next) - break; /* We performed the replacement. */ - old_next = ret_next; - } - - /* - * Ensure that the old node is not visible to readers anymore: - * lookup for the node, and remove it (along with any other - * logically removed node) if found. - */ - lookup = lookup_bucket(ht, size, bit_reverse_ulong(old_node->p.reverse_hash)); - dummy = (struct cds_lfht_node *) lookup; - _cds_lfht_gc_bucket(dummy, new_node); - - assert(is_removed(rcu_dereference(old_node->p.next))); - return 0; -} - -/* - * A non-NULL unique_ret pointer uses the "add unique" (or uniquify) add - * mode. A NULL unique_ret allows creation of duplicate keys. - */ -static -void _cds_lfht_add(struct cds_lfht *ht, - unsigned long size, - struct cds_lfht_node *node, - struct cds_lfht_iter *unique_ret, - int dummy) -{ - struct cds_lfht_node *iter_prev, *iter, *next, *new_node, *new_next, - *return_node; - struct _cds_lfht_node *lookup; - - assert(!is_dummy(node)); - assert(!is_removed(node)); - lookup = lookup_bucket(ht, size, bit_reverse_ulong(node->p.reverse_hash)); - for (;;) { - uint32_t chain_len = 0; - - /* - * iter_prev points to the non-removed node prior to the - * insert location. - */ - iter_prev = (struct cds_lfht_node *) lookup; - /* We can always skip the dummy node initially */ - iter = rcu_dereference(iter_prev->p.next); - assert(iter_prev->p.reverse_hash <= node->p.reverse_hash); - for (;;) { - if (caa_unlikely(is_end(iter))) - goto insert; - if (caa_likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash)) - goto insert; - - /* dummy node is the first node of the identical-hash-value chain */ - if (dummy && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash) - goto insert; - - next = rcu_dereference(clear_flag(iter)->p.next); - if (caa_unlikely(is_removed(next))) - goto gc_node; - - /* uniquely add */ - if (unique_ret - && !is_dummy(next) - && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash) { - struct cds_lfht_iter d_iter = { .node = node, .next = iter, }; - - /* - * uniquely adding inserts the node as the first - * node of the identical-hash-value node chain. - * - * This semantic ensures no duplicated keys - * should ever be observable in the table - * (including observe one node by one node - * by forward iterations) - */ - cds_lfht_next_duplicate(ht, &d_iter); - if (!d_iter.node) - goto insert; - - *unique_ret = d_iter; - return; - } - - /* Only account for identical reverse hash once */ - if (iter_prev->p.reverse_hash != clear_flag(iter)->p.reverse_hash - && !is_dummy(next)) - check_resize(ht, size, ++chain_len); - iter_prev = clear_flag(iter); - iter = next; - } - - insert: - assert(node != clear_flag(iter)); - assert(!is_removed(iter_prev)); - assert(!is_removed(iter)); - assert(iter_prev != node); - if (!dummy) - node->p.next = clear_flag(iter); - else - node->p.next = flag_dummy(clear_flag(iter)); - if (is_dummy(iter)) - new_node = flag_dummy(node); - else - new_node = node; - if (uatomic_cmpxchg(&iter_prev->p.next, iter, - new_node) != iter) { - continue; /* retry */ - } else { - return_node = node; - goto end; - } - - gc_node: - assert(!is_removed(iter)); - if (is_dummy(iter)) - new_next = flag_dummy(clear_flag(next)); - else - new_next = clear_flag(next); - (void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next); - /* retry */ - } -end: - if (unique_ret) { - unique_ret->node = return_node; - /* unique_ret->next left unset, never used. */ - } -} - -static -int _cds_lfht_del(struct cds_lfht *ht, unsigned long size, - struct cds_lfht_node *node, - int dummy_removal) -{ - struct cds_lfht_node *dummy, *next, *old; - struct _cds_lfht_node *lookup; - - if (!node) /* Return -ENOENT if asked to delete NULL node */ - return -ENOENT; - - /* logically delete the node */ - assert(!is_dummy(node)); - assert(!is_removed(node)); - old = rcu_dereference(node->p.next); - do { - struct cds_lfht_node *new_next; - - next = old; - if (caa_unlikely(is_removed(next))) - return -ENOENT; - if (dummy_removal) - assert(is_dummy(next)); - else - assert(!is_dummy(next)); - new_next = flag_removed(next); - old = uatomic_cmpxchg(&node->p.next, next, new_next); - } while (old != next); - /* We performed the (logical) deletion. */ - - /* - * Ensure that the node is not visible to readers anymore: lookup for - * the node, and remove it (along with any other logically removed node) - * if found. - */ - lookup = lookup_bucket(ht, size, bit_reverse_ulong(node->p.reverse_hash)); - dummy = (struct cds_lfht_node *) lookup; - _cds_lfht_gc_bucket(dummy, node); - - assert(is_removed(rcu_dereference(node->p.next))); - return 0; -} - -static -void *partition_resize_thread(void *arg) -{ - struct partition_resize_work *work = arg; - - work->ht->cds_lfht_rcu_register_thread(); - work->fct(work->ht, work->i, work->start, work->len); - work->ht->cds_lfht_rcu_unregister_thread(); - return NULL; -} - -static -void partition_resize_helper(struct cds_lfht *ht, unsigned long i, - unsigned long len, - void (*fct)(struct cds_lfht *ht, unsigned long i, - unsigned long start, unsigned long len)) -{ - unsigned long partition_len; - struct partition_resize_work *work; - int thread, ret; - unsigned long nr_threads; - - /* - * Note: nr_cpus_mask + 1 is always power of 2. - * We spawn just the number of threads we need to satisfy the minimum - * partition size, up to the number of CPUs in the system. - */ - if (nr_cpus_mask > 0) { - nr_threads = min(nr_cpus_mask + 1, - len >> MIN_PARTITION_PER_THREAD_ORDER); - } else { - nr_threads = 1; - } - partition_len = len >> get_count_order_ulong(nr_threads); - work = calloc(nr_threads, sizeof(*work)); - assert(work); - for (thread = 0; thread < nr_threads; thread++) { - work[thread].ht = ht; - work[thread].i = i; - work[thread].len = partition_len; - work[thread].start = thread * partition_len; - work[thread].fct = fct; - ret = pthread_create(&(work[thread].thread_id), ht->resize_attr, - partition_resize_thread, &work[thread]); - assert(!ret); - } - for (thread = 0; thread < nr_threads; thread++) { - ret = pthread_join(work[thread].thread_id, NULL); - assert(!ret); - } - free(work); -} - -/* - * Holding RCU read lock to protect _cds_lfht_add against memory - * reclaim that could be performed by other call_rcu worker threads (ABA - * problem). - * - * When we reach a certain length, we can split this population phase over - * many worker threads, based on the number of CPUs available in the system. - * This should therefore take care of not having the expand lagging behind too - * many concurrent insertion threads by using the scheduler's ability to - * schedule dummy node population fairly with insertions. - */ -static -void init_table_populate_partition(struct cds_lfht *ht, unsigned long i, - unsigned long start, unsigned long len) -{ - unsigned long j; - - assert(i > ht->min_alloc_order); - ht->cds_lfht_rcu_read_lock(); - for (j = start; j < start + len; j++) { - struct cds_lfht_node *new_node = - (struct cds_lfht_node *) &ht->t.tbl[i]->nodes[j]; - - dbg_printf("init populate: i %lu j %lu hash %lu\n", - i, j, (1UL << (i - 1)) + j); - new_node->p.reverse_hash = - bit_reverse_ulong((1UL << (i - 1)) + j); - _cds_lfht_add(ht, 1UL << (i - 1), - new_node, NULL, 1); - } - ht->cds_lfht_rcu_read_unlock(); -} - -static -void init_table_populate(struct cds_lfht *ht, unsigned long i, - unsigned long len) -{ - assert(nr_cpus_mask != -1); - if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) { - ht->cds_lfht_rcu_thread_online(); - init_table_populate_partition(ht, i, 0, len); - ht->cds_lfht_rcu_thread_offline(); - return; - } - partition_resize_helper(ht, i, len, init_table_populate_partition); -} - -static -void init_table(struct cds_lfht *ht, - unsigned long first_order, unsigned long last_order) -{ - unsigned long i; - - dbg_printf("init table: first_order %lu last_order %lu\n", - first_order, last_order); - assert(first_order > ht->min_alloc_order); - for (i = first_order; i <= last_order; i++) { - unsigned long len; - - len = 1UL << (i - 1); - dbg_printf("init order %lu len: %lu\n", i, len); - - /* Stop expand if the resize target changes under us */ - if (CMM_LOAD_SHARED(ht->t.resize_target) < (1UL << i)) - break; - - ht->t.tbl[i] = calloc(1, len * sizeof(struct _cds_lfht_node)); - assert(ht->t.tbl[i]); - - /* - * Set all dummy nodes reverse hash values for a level and - * link all dummy nodes into the table. - */ - init_table_populate(ht, i, len); - - /* - * Update table size. - */ - cmm_smp_wmb(); /* populate data before RCU size */ - CMM_STORE_SHARED(ht->t.size, 1UL << i); - - dbg_printf("init new size: %lu\n", 1UL << i); - if (CMM_LOAD_SHARED(ht->in_progress_destroy)) - break; - } -} - -/* - * Holding RCU read lock to protect _cds_lfht_remove against memory - * reclaim that could be performed by other call_rcu worker threads (ABA - * problem). - * For a single level, we logically remove and garbage collect each node. - * - * As a design choice, we perform logical removal and garbage collection on a - * node-per-node basis to simplify this algorithm. We also assume keeping good - * cache locality of the operation would overweight possible performance gain - * that could be achieved by batching garbage collection for multiple levels. - * However, this would have to be justified by benchmarks. - * - * Concurrent removal and add operations are helping us perform garbage - * collection of logically removed nodes. We guarantee that all logically - * removed nodes have been garbage-collected (unlinked) before call_rcu is - * invoked to free a hole level of dummy nodes (after a grace period). - * - * Logical removal and garbage collection can therefore be done in batch or on a - * node-per-node basis, as long as the guarantee above holds. - * - * When we reach a certain length, we can split this removal over many worker - * threads, based on the number of CPUs available in the system. This should - * take care of not letting resize process lag behind too many concurrent - * updater threads actively inserting into the hash table. - */ -static -void remove_table_partition(struct cds_lfht *ht, unsigned long i, - unsigned long start, unsigned long len) -{ - unsigned long j; - - assert(i > ht->min_alloc_order); - ht->cds_lfht_rcu_read_lock(); - for (j = start; j < start + len; j++) { - struct cds_lfht_node *fini_node = - (struct cds_lfht_node *) &ht->t.tbl[i]->nodes[j]; - - dbg_printf("remove entry: i %lu j %lu hash %lu\n", - i, j, (1UL << (i - 1)) + j); - fini_node->p.reverse_hash = - bit_reverse_ulong((1UL << (i - 1)) + j); - (void) _cds_lfht_del(ht, 1UL << (i - 1), fini_node, 1); - } - ht->cds_lfht_rcu_read_unlock(); -} - -static -void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len) -{ - - assert(nr_cpus_mask != -1); - if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) { - ht->cds_lfht_rcu_thread_online(); - remove_table_partition(ht, i, 0, len); - ht->cds_lfht_rcu_thread_offline(); - return; - } - partition_resize_helper(ht, i, len, remove_table_partition); -} - -static -void fini_table(struct cds_lfht *ht, - unsigned long first_order, unsigned long last_order) -{ - long i; - void *free_by_rcu = NULL; - - dbg_printf("fini table: first_order %lu last_order %lu\n", - first_order, last_order); - assert(first_order > ht->min_alloc_order); - for (i = last_order; i >= first_order; i--) { - unsigned long len; - - len = 1UL << (i - 1); - dbg_printf("fini order %lu len: %lu\n", i, len); - - /* Stop shrink if the resize target changes under us */ - if (CMM_LOAD_SHARED(ht->t.resize_target) > (1UL << (i - 1))) - break; - - cmm_smp_wmb(); /* populate data before RCU size */ - CMM_STORE_SHARED(ht->t.size, 1UL << (i - 1)); - - /* - * We need to wait for all add operations to reach Q.S. (and - * thus use the new table for lookups) before we can start - * releasing the old dummy nodes. Otherwise their lookup will - * return a logically removed node as insert position. - */ - ht->cds_lfht_synchronize_rcu(); - if (free_by_rcu) - free(free_by_rcu); - - /* - * Set "removed" flag in dummy nodes about to be removed. - * Unlink all now-logically-removed dummy node pointers. - * Concurrent add/remove operation are helping us doing - * the gc. - */ - remove_table(ht, i, len); - - free_by_rcu = ht->t.tbl[i]; - - dbg_printf("fini new size: %lu\n", 1UL << i); - if (CMM_LOAD_SHARED(ht->in_progress_destroy)) - break; - } - - if (free_by_rcu) { - ht->cds_lfht_synchronize_rcu(); - free(free_by_rcu); - } -} - -static -void cds_lfht_create_dummy(struct cds_lfht *ht, unsigned long size) -{ - struct _cds_lfht_node *prev, *node; - unsigned long order, len, i, j; - - ht->t.tbl[0] = calloc(1, ht->min_alloc_size * sizeof(struct _cds_lfht_node)); - assert(ht->t.tbl[0]); - - dbg_printf("create dummy: order %lu index %lu hash %lu\n", 0, 0, 0); - ht->t.tbl[0]->nodes[0].next = flag_dummy(get_end()); - ht->t.tbl[0]->nodes[0].reverse_hash = 0; - - for (order = 1; order < get_count_order_ulong(size) + 1; order++) { - len = 1UL << (order - 1); - if (order <= ht->min_alloc_order) { - ht->t.tbl[order] = (struct rcu_level *) (ht->t.tbl[0]->nodes + len); - } else { - ht->t.tbl[order] = calloc(1, len * sizeof(struct _cds_lfht_node)); - assert(ht->t.tbl[order]); - } - - i = 0; - prev = ht->t.tbl[i]->nodes; - for (j = 0; j < len; j++) { - if (j & (j - 1)) { /* Between power of 2 */ - prev++; - } else if (j) { /* At each power of 2 */ - i++; - prev = ht->t.tbl[i]->nodes; - } - - node = &ht->t.tbl[order]->nodes[j]; - dbg_printf("create dummy: order %lu index %lu hash %lu\n", - order, j, j + len); - node->next = prev->next; - assert(is_dummy(node->next)); - node->reverse_hash = bit_reverse_ulong(j + len); - prev->next = flag_dummy((struct cds_lfht_node *)node); - } - } -} - -struct cds_lfht *_cds_lfht_new(cds_lfht_hash_fct hash_fct, - cds_lfht_compare_fct compare_fct, - unsigned long hash_seed, - unsigned long init_size, - unsigned long min_alloc_size, - int flags, - void (*cds_lfht_call_rcu)(struct rcu_head *head, - void (*func)(struct rcu_head *head)), - void (*cds_lfht_synchronize_rcu)(void), - void (*cds_lfht_rcu_read_lock)(void), - void (*cds_lfht_rcu_read_unlock)(void), - void (*cds_lfht_rcu_thread_offline)(void), - void (*cds_lfht_rcu_thread_online)(void), - void (*cds_lfht_rcu_register_thread)(void), - void (*cds_lfht_rcu_unregister_thread)(void), - pthread_attr_t *attr) -{ - struct cds_lfht *ht; - unsigned long order; - - /* min_alloc_size must be power of two */ - if (!min_alloc_size || (min_alloc_size & (min_alloc_size - 1))) - return NULL; - /* init_size must be power of two */ - if (!init_size || (init_size & (init_size - 1))) - return NULL; - min_alloc_size = max(min_alloc_size, MIN_TABLE_SIZE); - init_size = max(init_size, min_alloc_size); - ht = calloc(1, sizeof(struct cds_lfht)); - assert(ht); - ht->flags = flags; - ht->hash_fct = hash_fct; - ht->compare_fct = compare_fct; - ht->hash_seed = hash_seed; - ht->cds_lfht_call_rcu = cds_lfht_call_rcu; - ht->cds_lfht_synchronize_rcu = cds_lfht_synchronize_rcu; - ht->cds_lfht_rcu_read_lock = cds_lfht_rcu_read_lock; - ht->cds_lfht_rcu_read_unlock = cds_lfht_rcu_read_unlock; - ht->cds_lfht_rcu_thread_offline = cds_lfht_rcu_thread_offline; - ht->cds_lfht_rcu_thread_online = cds_lfht_rcu_thread_online; - ht->cds_lfht_rcu_register_thread = cds_lfht_rcu_register_thread; - ht->cds_lfht_rcu_unregister_thread = cds_lfht_rcu_unregister_thread; - ht->resize_attr = attr; - alloc_split_items_count(ht); - /* this mutex should not nest in read-side C.S. */ - pthread_mutex_init(&ht->resize_mutex, NULL); - order = get_count_order_ulong(init_size); - ht->t.resize_target = 1UL << order; - ht->min_alloc_size = min_alloc_size; - ht->min_alloc_order = get_count_order_ulong(min_alloc_size); - cds_lfht_create_dummy(ht, 1UL << order); - ht->t.size = 1UL << order; - return ht; -} - -void cds_lfht_lookup(struct cds_lfht *ht, void *key, size_t key_len, - struct cds_lfht_iter *iter) -{ - struct cds_lfht_node *node, *next, *dummy_node; - struct _cds_lfht_node *lookup; - unsigned long hash, reverse_hash, size; - - hash = ht->hash_fct(key, key_len, ht->hash_seed); - reverse_hash = bit_reverse_ulong(hash); - - size = rcu_dereference(ht->t.size); - lookup = lookup_bucket(ht, size, hash); - dummy_node = (struct cds_lfht_node *) lookup; - /* We can always skip the dummy node initially */ - node = rcu_dereference(dummy_node->p.next); - node = clear_flag(node); - for (;;) { - if (caa_unlikely(is_end(node))) { - node = next = NULL; - break; - } - if (caa_unlikely(node->p.reverse_hash > reverse_hash)) { - node = next = NULL; - break; - } - next = rcu_dereference(node->p.next); - assert(node == clear_flag(node)); - if (caa_likely(!is_removed(next)) - && !is_dummy(next) - && node->p.reverse_hash == reverse_hash - && caa_likely(!ht->compare_fct(node->key, node->key_len, key, key_len))) { - break; - } - node = clear_flag(next); - } - assert(!node || !is_dummy(rcu_dereference(node->p.next))); - iter->node = node; - iter->next = next; -} - -void cds_lfht_next_duplicate(struct cds_lfht *ht, struct cds_lfht_iter *iter) -{ - struct cds_lfht_node *node, *next; - unsigned long reverse_hash; - void *key; - size_t key_len; - - node = iter->node; - reverse_hash = node->p.reverse_hash; - key = node->key; - key_len = node->key_len; - next = iter->next; - node = clear_flag(next); - - for (;;) { - if (caa_unlikely(is_end(node))) { - node = next = NULL; - break; - } - if (caa_unlikely(node->p.reverse_hash > reverse_hash)) { - node = next = NULL; - break; - } - next = rcu_dereference(node->p.next); - if (caa_likely(!is_removed(next)) - && !is_dummy(next) - && caa_likely(!ht->compare_fct(node->key, node->key_len, key, key_len))) { - break; - } - node = clear_flag(next); - } - assert(!node || !is_dummy(rcu_dereference(node->p.next))); - iter->node = node; - iter->next = next; -} - -void cds_lfht_next(struct cds_lfht *ht, struct cds_lfht_iter *iter) -{ - struct cds_lfht_node *node, *next; - - node = clear_flag(iter->next); - for (;;) { - if (caa_unlikely(is_end(node))) { - node = next = NULL; - break; - } - next = rcu_dereference(node->p.next); - if (caa_likely(!is_removed(next)) - && !is_dummy(next)) { - break; - } - node = clear_flag(next); - } - assert(!node || !is_dummy(rcu_dereference(node->p.next))); - iter->node = node; - iter->next = next; -} - -void cds_lfht_first(struct cds_lfht *ht, struct cds_lfht_iter *iter) -{ - struct _cds_lfht_node *lookup; - - /* - * Get next after first dummy node. The first dummy node is the - * first node of the linked list. - */ - lookup = &ht->t.tbl[0]->nodes[0]; - iter->next = lookup->next; - cds_lfht_next(ht, iter); -} - -void cds_lfht_add(struct cds_lfht *ht, struct cds_lfht_node *node) -{ - unsigned long hash, size; - - hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed); - node->p.reverse_hash = bit_reverse_ulong((unsigned long) hash); - - size = rcu_dereference(ht->t.size); - _cds_lfht_add(ht, size, node, NULL, 0); - ht_count_add(ht, size, hash); -} - -struct cds_lfht_node *cds_lfht_add_unique(struct cds_lfht *ht, - struct cds_lfht_node *node) -{ - unsigned long hash, size; - struct cds_lfht_iter iter; - - hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed); - node->p.reverse_hash = bit_reverse_ulong((unsigned long) hash); - - size = rcu_dereference(ht->t.size); - _cds_lfht_add(ht, size, node, &iter, 0); - if (iter.node == node) - ht_count_add(ht, size, hash); - return iter.node; -} - -struct cds_lfht_node *cds_lfht_add_replace(struct cds_lfht *ht, - struct cds_lfht_node *node) -{ - unsigned long hash, size; - struct cds_lfht_iter iter; - - hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed); - node->p.reverse_hash = bit_reverse_ulong((unsigned long) hash); - - size = rcu_dereference(ht->t.size); - for (;;) { - _cds_lfht_add(ht, size, node, &iter, 0); - if (iter.node == node) { - ht_count_add(ht, size, hash); - return NULL; - } - - if (!_cds_lfht_replace(ht, size, iter.node, iter.next, node)) - return iter.node; - } -} - -int cds_lfht_replace(struct cds_lfht *ht, struct cds_lfht_iter *old_iter, - struct cds_lfht_node *new_node) -{ - unsigned long size; - - size = rcu_dereference(ht->t.size); - return _cds_lfht_replace(ht, size, old_iter->node, old_iter->next, - new_node); -} - -int cds_lfht_del(struct cds_lfht *ht, struct cds_lfht_iter *iter) -{ - unsigned long size, hash; - int ret; - - size = rcu_dereference(ht->t.size); - ret = _cds_lfht_del(ht, size, iter->node, 0); - if (!ret) { - hash = bit_reverse_ulong(iter->node->p.reverse_hash); - ht_count_del(ht, size, hash); - } - return ret; -} - -static -int cds_lfht_delete_dummy(struct cds_lfht *ht) -{ - struct cds_lfht_node *node; - struct _cds_lfht_node *lookup; - unsigned long order, i, size; - - /* Check that the table is empty */ - lookup = &ht->t.tbl[0]->nodes[0]; - node = (struct cds_lfht_node *) lookup; - do { - node = clear_flag(node)->p.next; - if (!is_dummy(node)) - return -EPERM; - assert(!is_removed(node)); - } while (!is_end(node)); - /* - * size accessed without rcu_dereference because hash table is - * being destroyed. - */ - size = ht->t.size; - /* Internal sanity check: all nodes left should be dummy */ - for (order = 0; order < get_count_order_ulong(size) + 1; order++) { - unsigned long len; - - len = !order ? 1 : 1UL << (order - 1); - for (i = 0; i < len; i++) { - dbg_printf("delete order %lu i %lu hash %lu\n", - order, i, - bit_reverse_ulong(ht->t.tbl[order]->nodes[i].reverse_hash)); - assert(is_dummy(ht->t.tbl[order]->nodes[i].next)); - } - - if (order == ht->min_alloc_order) - poison_free(ht->t.tbl[0]); - else if (order > ht->min_alloc_order) - poison_free(ht->t.tbl[order]); - /* Nothing to delete for order < ht->min_alloc_order */ - } - return 0; -} - -/* - * Should only be called when no more concurrent readers nor writers can - * possibly access the table. - */ -int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr) -{ - int ret; - - /* Wait for in-flight resize operations to complete */ - _CMM_STORE_SHARED(ht->in_progress_destroy, 1); - cmm_smp_mb(); /* Store destroy before load resize */ - while (uatomic_read(&ht->in_progress_resize)) - poll(NULL, 0, 100); /* wait for 100ms */ - ret = cds_lfht_delete_dummy(ht); - if (ret) - return ret; - free_split_items_count(ht); - if (attr) - *attr = ht->resize_attr; - poison_free(ht); - return ret; -} - -void cds_lfht_count_nodes(struct cds_lfht *ht, - long *approx_before, - unsigned long *count, - unsigned long *removed, - long *approx_after) -{ - struct cds_lfht_node *node, *next; - struct _cds_lfht_node *lookup; - unsigned long nr_dummy = 0; - - *approx_before = 0; - if (ht->split_count) { - int i; - - for (i = 0; i < split_count_mask + 1; i++) { - *approx_before += uatomic_read(&ht->split_count[i].add); - *approx_before -= uatomic_read(&ht->split_count[i].del); - } - } - - *count = 0; - *removed = 0; - - /* Count non-dummy nodes in the table */ - lookup = &ht->t.tbl[0]->nodes[0]; - node = (struct cds_lfht_node *) lookup; - do { - next = rcu_dereference(node->p.next); - if (is_removed(next)) { - if (!is_dummy(next)) - (*removed)++; - else - (nr_dummy)++; - } else if (!is_dummy(next)) - (*count)++; - else - (nr_dummy)++; - node = clear_flag(next); - } while (!is_end(node)); - dbg_printf("number of dummy nodes: %lu\n", nr_dummy); - *approx_after = 0; - if (ht->split_count) { - int i; - - for (i = 0; i < split_count_mask + 1; i++) { - *approx_after += uatomic_read(&ht->split_count[i].add); - *approx_after -= uatomic_read(&ht->split_count[i].del); - } - } -} - -/* called with resize mutex held */ -static -void _do_cds_lfht_grow(struct cds_lfht *ht, - unsigned long old_size, unsigned long new_size) -{ - unsigned long old_order, new_order; - - old_order = get_count_order_ulong(old_size); - new_order = get_count_order_ulong(new_size); - dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n", - old_size, old_order, new_size, new_order); - assert(new_size > old_size); - init_table(ht, old_order + 1, new_order); -} - -/* called with resize mutex held */ -static -void _do_cds_lfht_shrink(struct cds_lfht *ht, - unsigned long old_size, unsigned long new_size) -{ - unsigned long old_order, new_order; - - new_size = max(new_size, ht->min_alloc_size); - old_order = get_count_order_ulong(old_size); - new_order = get_count_order_ulong(new_size); - dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n", - old_size, old_order, new_size, new_order); - assert(new_size < old_size); - - /* Remove and unlink all dummy nodes to remove. */ - fini_table(ht, new_order + 1, old_order); -} - - -/* called with resize mutex held */ -static -void _do_cds_lfht_resize(struct cds_lfht *ht) -{ - unsigned long new_size, old_size; - - /* - * Resize table, re-do if the target size has changed under us. - */ - do { - assert(uatomic_read(&ht->in_progress_resize)); - if (CMM_LOAD_SHARED(ht->in_progress_destroy)) - break; - ht->t.resize_initiated = 1; - old_size = ht->t.size; - new_size = CMM_LOAD_SHARED(ht->t.resize_target); - if (old_size < new_size) - _do_cds_lfht_grow(ht, old_size, new_size); - else if (old_size > new_size) - _do_cds_lfht_shrink(ht, old_size, new_size); - ht->t.resize_initiated = 0; - /* write resize_initiated before read resize_target */ - cmm_smp_mb(); - } while (ht->t.size != CMM_LOAD_SHARED(ht->t.resize_target)); -} - -static -unsigned long resize_target_update(struct cds_lfht *ht, unsigned long size, - int growth_order) -{ - return _uatomic_max(&ht->t.resize_target, - size << growth_order); -} - -static -void resize_target_update_count(struct cds_lfht *ht, - unsigned long count) -{ - count = max(count, ht->min_alloc_size); - uatomic_set(&ht->t.resize_target, count); -} - -void cds_lfht_resize(struct cds_lfht *ht, unsigned long new_size) -{ - resize_target_update_count(ht, new_size); - CMM_STORE_SHARED(ht->t.resize_initiated, 1); - ht->cds_lfht_rcu_thread_offline(); - pthread_mutex_lock(&ht->resize_mutex); - _do_cds_lfht_resize(ht); - pthread_mutex_unlock(&ht->resize_mutex); - ht->cds_lfht_rcu_thread_online(); -} - -static -void do_resize_cb(struct rcu_head *head) -{ - struct rcu_resize_work *work = - caa_container_of(head, struct rcu_resize_work, head); - struct cds_lfht *ht = work->ht; - - ht->cds_lfht_rcu_thread_offline(); - pthread_mutex_lock(&ht->resize_mutex); - _do_cds_lfht_resize(ht); - pthread_mutex_unlock(&ht->resize_mutex); - ht->cds_lfht_rcu_thread_online(); - poison_free(work); - cmm_smp_mb(); /* finish resize before decrement */ - uatomic_dec(&ht->in_progress_resize); -} - -static -void cds_lfht_resize_lazy(struct cds_lfht *ht, unsigned long size, int growth) -{ - struct rcu_resize_work *work; - unsigned long target_size; - - target_size = resize_target_update(ht, size, growth); - /* Store resize_target before read resize_initiated */ - cmm_smp_mb(); - if (!CMM_LOAD_SHARED(ht->t.resize_initiated) && size < target_size) { - uatomic_inc(&ht->in_progress_resize); - cmm_smp_mb(); /* increment resize count before load destroy */ - if (CMM_LOAD_SHARED(ht->in_progress_destroy)) { - uatomic_dec(&ht->in_progress_resize); - return; - } - work = malloc(sizeof(*work)); - work->ht = ht; - ht->cds_lfht_call_rcu(&work->head, do_resize_cb); - CMM_STORE_SHARED(ht->t.resize_initiated, 1); - } -} - -static -void cds_lfht_resize_lazy_count(struct cds_lfht *ht, unsigned long size, - unsigned long count) -{ - struct rcu_resize_work *work; - - if (!(ht->flags & CDS_LFHT_AUTO_RESIZE)) - return; - resize_target_update_count(ht, count); - /* Store resize_target before read resize_initiated */ - cmm_smp_mb(); - if (!CMM_LOAD_SHARED(ht->t.resize_initiated)) { - uatomic_inc(&ht->in_progress_resize); - cmm_smp_mb(); /* increment resize count before load destroy */ - if (CMM_LOAD_SHARED(ht->in_progress_destroy)) { - uatomic_dec(&ht->in_progress_resize); - return; - } - work = malloc(sizeof(*work)); - work->ht = ht; - ht->cds_lfht_call_rcu(&work->head, do_resize_cb); - CMM_STORE_SHARED(ht->t.resize_initiated, 1); - } -} diff --git a/common/hashtable/rculfhash.h b/common/hashtable/rculfhash.h deleted file mode 100644 index 719cd58f7..000000000 --- a/common/hashtable/rculfhash.h +++ /dev/null @@ -1,389 +0,0 @@ -#ifndef _URCU_RCULFHASH_H -#define _URCU_RCULFHASH_H - -/* - * urcu/rculfhash.h - * - * Userspace RCU library - Lock-Free RCU Hash Table - * - * Copyright 2011 - Mathieu Desnoyers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * Include this file _after_ including your URCU flavor. - */ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * struct cds_lfht_node and struct _cds_lfht_node should be aligned on - * 4-bytes boundaries because the two lower bits are used as flags. - */ - -/* - * _cds_lfht_node: Contains the internal pointers and reverse-hash - * value required for lookup and traversal of the hash table. - */ -struct _cds_lfht_node { - struct cds_lfht_node *next; /* ptr | DUMMY_FLAG | REMOVED_FLAG */ - unsigned long reverse_hash; -} __attribute__((aligned(4))); - -/* - * cds_lfht_node: Contains the full key and length required to check for - * an actual match, and also contains an rcu_head structure that is used - * by RCU to track a node through a given RCU grace period. There is an - * instance of _cds_lfht_node enclosed as a field within each - * _cds_lfht_node structure. - * - * struct cds_lfht_node can be embedded into a structure (as a field). - * caa_container_of() can be used to get the structure from the struct - * cds_lfht_node after a lookup. - */ -struct cds_lfht_node { - /* cache-hot for iteration */ - struct _cds_lfht_node p; /* needs to be first field */ - void *key; - unsigned int key_len; - /* cache-cold for iteration */ - struct rcu_head head; -}; - -/* cds_lfht_iter: Used to track state while traversing a hash chain. */ -struct cds_lfht_iter { - struct cds_lfht_node *node, *next; -}; - -static inline -struct cds_lfht_node *cds_lfht_iter_get_node(struct cds_lfht_iter *iter) -{ - return iter->node; -} - -struct cds_lfht; - -/* - * Caution ! - * Ensure reader and writer threads are registered as urcu readers. - */ - -typedef unsigned long (*cds_lfht_hash_fct)(void *key, size_t length, - unsigned long seed); -typedef unsigned long (*cds_lfht_compare_fct)(void *key1, size_t key1_len, - void *key2, size_t key2_len); - -/* - * cds_lfht_node_init - initialize a hash table node - */ -static inline -void cds_lfht_node_init(struct cds_lfht_node *node, void *key, - size_t key_len) -{ - node->key = key; - node->key_len = key_len; -} - -/* - * Hash table creation flags. - */ -enum { - CDS_LFHT_AUTO_RESIZE = (1U << 0), - CDS_LFHT_ACCOUNTING = (1U << 1), -}; - -/* - * _cds_lfht_new - API used by cds_lfht_new wrapper. Do not use directly. - */ -struct cds_lfht *_cds_lfht_new(cds_lfht_hash_fct hash_fct, - cds_lfht_compare_fct compare_fct, - unsigned long hash_seed, - unsigned long init_size, - unsigned long min_alloc_size, - int flags, - void (*cds_lfht_call_rcu)(struct rcu_head *head, - void (*func)(struct rcu_head *head)), - void (*cds_lfht_synchronize_rcu)(void), - void (*cds_lfht_rcu_read_lock)(void), - void (*cds_lfht_rcu_read_unlock)(void), - void (*cds_lfht_rcu_thread_offline)(void), - void (*cds_lfht_rcu_thread_online)(void), - void (*cds_lfht_rcu_register_thread)(void), - void (*cds_lfht_rcu_unregister_thread)(void), - pthread_attr_t *attr); - -/* - * cds_lfht_new - allocate a hash table. - * @hash_fct: the hashing function. - * @compare_fct: the key comparison function. - * @hash_seed: the seed for hash function. - * @init_size: number of nodes to allocate initially. Must be power of two. - * @min_alloc_size: the smallest allocation size to use. Must be power of two. - * @flags: hash table creation flags (can be combined with bitwise or: '|'). - * 0: no flags. - * CDS_LFHT_AUTO_RESIZE: automatically resize hash table. - * @attr: optional resize worker thread attributes. NULL for default. - * - * Return NULL on error. - * Note: the RCU flavor must be already included before the hash table header. - * - * The programmer is responsible for ensuring that resize operation has a - * priority equal to hash table updater threads. It should be performed by - * specifying the appropriate priority in the pthread "attr" argument, and, - * for CDS_LFHT_AUTO_RESIZE, by ensuring that call_rcu worker threads also have - * this priority level. Having lower priority for call_rcu and resize threads - * does not pose any correctness issue, but the resize operations could be - * starved by updates, thus leading to long hash table bucket chains. - * Threads calling this API need to be registered RCU read-side threads. - */ -static inline -struct cds_lfht *cds_lfht_new(cds_lfht_hash_fct hash_fct, - cds_lfht_compare_fct compare_fct, - unsigned long hash_seed, - unsigned long init_size, - unsigned long min_alloc_size, - int flags, - pthread_attr_t *attr) -{ - return _cds_lfht_new(hash_fct, compare_fct, hash_seed, - init_size, min_alloc_size, flags, - call_rcu, synchronize_rcu, rcu_read_lock, - rcu_read_unlock, rcu_thread_offline, - rcu_thread_online, rcu_register_thread, - rcu_unregister_thread, attr); -} - -/* - * cds_lfht_destroy - destroy a hash table. - * @ht: the hash table to destroy. - * @attr: (output) resize worker thread attributes, as received by cds_lfht_new. - * The caller will typically want to free this pointer if dynamically - * allocated. The attr point can be NULL if the caller does not - * need to be informed of the value passed to cds_lfht_new(). - * - * Return 0 on success, negative error value on error. - * Threads calling this API need to be registered RCU read-side threads. - */ -int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr); - -/* - * cds_lfht_count_nodes - count the number of nodes in the hash table. - * @ht: the hash table. - * @split_count_before: Sample the node count split-counter before traversal. - * @count: Traverse the hash table, count the number of nodes observed. - * @removed: Number of logically removed nodes observed during traversal. - * @split_count_after: Sample the node count split-counter after traversal. - * Call with rcu_read_lock held. - * Threads calling this API need to be registered RCU read-side threads. - */ -void cds_lfht_count_nodes(struct cds_lfht *ht, - long *split_count_before, - unsigned long *count, - unsigned long *removed, - long *split_count_after); - -/* - * cds_lfht_lookup - lookup a node by key. - * - * Output in "*iter". *iter->node set to NULL if not found. - * Call with rcu_read_lock held. - * Threads calling this API need to be registered RCU read-side threads. - */ -void cds_lfht_lookup(struct cds_lfht *ht, void *key, size_t key_len, - struct cds_lfht_iter *iter); - -/* - * cds_lfht_next_duplicate - get the next item with same key (after a lookup). - * - * Uses an iterator initialized by a lookup. - * Sets *iter-node to the following node with same key. - * Sets *iter->node to NULL if no following node exists with same key. - * RCU read-side lock must be held across cds_lfht_lookup and - * cds_lfht_next calls, and also between cds_lfht_next calls using the - * node returned by a previous cds_lfht_next. - * Call with rcu_read_lock held. - * Threads calling this API need to be registered RCU read-side threads. - */ -void cds_lfht_next_duplicate(struct cds_lfht *ht, struct cds_lfht_iter *iter); - -/* - * cds_lfht_first - get the first node in the table. - * - * Output in "*iter". *iter->node set to NULL if table is empty. - * Call with rcu_read_lock held. - * Threads calling this API need to be registered RCU read-side threads. - */ -void cds_lfht_first(struct cds_lfht *ht, struct cds_lfht_iter *iter); - -/* - * cds_lfht_next - get the next node in the table. - * - * Input/Output in "*iter". *iter->node set to NULL if *iter was - * pointing to the last table node. - * Call with rcu_read_lock held. - * Threads calling this API need to be registered RCU read-side threads. - */ -void cds_lfht_next(struct cds_lfht *ht, struct cds_lfht_iter *iter); - -/* - * cds_lfht_add - add a node to the hash table. - * - * This function supports adding redundant keys into the table. - * Call with rcu_read_lock held. - * Threads calling this API need to be registered RCU read-side threads. - */ -void cds_lfht_add(struct cds_lfht *ht, struct cds_lfht_node *node); - -/* - * cds_lfht_add_unique - add a node to hash table, if key is not present. - * - * Return the node added upon success. - * Return the unique node already present upon failure. If - * cds_lfht_add_unique fails, the node passed as parameter should be - * freed by the caller. - * Call with rcu_read_lock held. - * Threads calling this API need to be registered RCU read-side threads. - * - * The semantic of this function is that if only this function is used - * to add keys into the table, no duplicated keys should ever be - * observable in the table. The same guarantee apply for combination of - * add_unique and add_replace (see below). - */ -struct cds_lfht_node *cds_lfht_add_unique(struct cds_lfht *ht, - struct cds_lfht_node *node); - -/* - * cds_lfht_add_replace - replace or add a node within hash table. - * - * Return the node replaced upon success. If no node matching the key - * was present, return NULL, which also means the operation succeeded. - * This replacement operation should never fail. - * Call with rcu_read_lock held. - * Threads calling this API need to be registered RCU read-side threads. - * After successful replacement, a grace period must be waited for before - * freeing the memory reserved for the returned node. - * - * The semantic of replacement vs lookups is the following: if lookups - * are performed between a key unique insertion and its removal, we - * guarantee that the lookups and get next will always find exactly one - * instance of the key if it is replaced concurrently with the lookups. - * - * Providing this semantic allows us to ensure that replacement-only - * schemes will never generate duplicated keys. It also allows us to - * guarantee that a combination of add_replace and add_unique updates - * will never generate duplicated keys. - */ -struct cds_lfht_node *cds_lfht_add_replace(struct cds_lfht *ht, - struct cds_lfht_node *node); - -/* - * cds_lfht_replace - replace a node pointer to by iter within hash table. - * - * Return 0 if replacement is successful, negative value otherwise. - * Replacing a NULL old node or an already removed node will fail with a - * negative value. - * Old node can be looked up with cds_lfht_lookup and cds_lfht_next. - * RCU read-side lock must be held between lookup and replacement. - * Call with rcu_read_lock held. - * Threads calling this API need to be registered RCU read-side threads. - * After successful replacement, a grace period must be waited for before - * freeing the memory reserved for the old node (which can be accessed - * with cds_lfht_iter_get_node). - * - * The semantic of replacement vs lookups is the following: if lookups - * are performed between a key unique insertion and its removal, we - * guarantee that the lookups and get next will always find exactly one - * instance of the key if it is replaced concurrently with the lookups. - * - * Providing this semantic allows us to ensure that replacement-only - * schemes will never generate duplicated keys. It also allows us to - * guarantee that a combination of add_replace and add_unique updates - * will never generate duplicated keys. - */ -int cds_lfht_replace(struct cds_lfht *ht, struct cds_lfht_iter *old_iter, - struct cds_lfht_node *new_node); - -/* - * cds_lfht_del - remove node pointed to by iterator from hash table. - * - * Return 0 if the node is successfully removed, negative value - * otherwise. - * Replacing a NULL node or an already removed node will fail with a - * negative value. - * Node can be looked up with cds_lfht_lookup and cds_lfht_next. - * cds_lfht_iter_get_node. - * RCU read-side lock must be held between lookup and removal. - * Call with rcu_read_lock held. - * Threads calling this API need to be registered RCU read-side threads. - * After successful removal, a grace period must be waited for before - * freeing the memory reserved for old node (which can be accessed with - * cds_lfht_iter_get_node). - */ -int cds_lfht_del(struct cds_lfht *ht, struct cds_lfht_iter *iter); - -/* - * cds_lfht_resize - Force a hash table resize - * @new_size: update to this hash table size. - * - * Threads calling this API need to be registered RCU read-side threads. - */ -void cds_lfht_resize(struct cds_lfht *ht, unsigned long new_size); - -/* - * Note: cds_lfht_for_each are safe for element removal during - * iteration. - */ -#define cds_lfht_for_each(ht, iter, node) \ - for (cds_lfht_first(ht, iter), \ - node = cds_lfht_iter_get_node(iter); \ - node != NULL; \ - cds_lfht_next(ht, iter), \ - node = cds_lfht_iter_get_node(iter)) - -#define cds_lfht_for_each_duplicate(ht, match, hash, key, iter, node) \ - for (cds_lfht_lookup(ht, match, hash, key, iter), \ - node = cds_lfht_iter_get_node(iter); \ - node != NULL; \ - cds_lfht_next_duplicate(ht, match, key, iter), \ - node = cds_lfht_iter_get_node(iter)) - -#define cds_lfht_for_each_entry(ht, iter, pos, member) \ - for (cds_lfht_first(ht, iter), \ - pos = caa_container_of(cds_lfht_iter_get_node(iter), \ - typeof(*(pos)), member); \ - &(pos)->member != NULL; \ - cds_lfht_next(ht, iter), \ - pos = caa_container_of(cds_lfht_iter_get_node(iter), \ - typeof(*(pos)), member)) - -#define cds_lfht_for_each_entry_duplicate(ht, match, hash, key, \ - iter, pos, member) \ -for (cds_lfht_lookup(ht, match, hash, key, iter), \ - pos = caa_container_of(cds_lfht_iter_get_node(iter), \ - typeof(*(pos)), member); \ - &(pos)->member != NULL; \ - cds_lfht_next_duplicate(ht, match, key, iter), \ - pos = caa_container_of(cds_lfht_iter_get_node(iter), \ - typeof(*(pos)), member)) - -#ifdef __cplusplus -} -#endif - -#endif /* _URCU_RCULFHASH_H */ diff --git a/configure.ac b/configure.ac index a55dfd55d..6bddf8783 100644 --- a/configure.ac +++ b/configure.ac @@ -139,6 +139,7 @@ AC_CONFIG_FILES([ liblttng-ustconsumer/Makefile liblttngctl/Makefile liblttng-sessiond-comm/Makefile + liblttng-ht/Makefile lttng-consumerd/Makefile lttng-sessiond/Makefile lttng/Makefile diff --git a/include/Makefile.am b/include/Makefile.am index 10caa6958..00120a6a0 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,6 +1,5 @@ lttnginclude_HEADERS = lttng/lttng.h lttng/lttng-kconsumer.h \ - lttng/lttng-ustconsumer.h lttng/lttng-consumer.h + lttng/lttng-ustconsumer.h lttng/lttng-consumer.h noinst_HEADERS = lttngerr.h lttng-kernel.h lttng-consumerd.h lttng-share.h \ - lttng-sessiond-comm.h lttng-kernel-ctl.h \ - runas.h + lttng-sessiond-comm.h lttng-kernel-ctl.h lttng-ht.h runas.h diff --git a/include/lttng-ht.h b/include/lttng-ht.h new file mode 100644 index 000000000..649ffcf54 --- /dev/null +++ b/include/lttng-ht.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2011 - David Goulet + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; only version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _LTT_HT_H +#define _LTT_HT_H + +#include +#include "../liblttng-ht/rculfhash.h" +#include "../liblttng-ht/rculfhash-internal.h" + +typedef unsigned long (*hash_fct)(void *_key, unsigned long seed); +typedef cds_lfht_match_fct hash_match_fct; + +enum lttng_ht_type { + LTTNG_HT_TYPE_STRING, + LTTNG_HT_TYPE_ULONG, +}; + +struct lttng_ht { + struct cds_lfht *ht; + cds_lfht_match_fct match_fct; + hash_fct hash_fct; +}; + +struct lttng_ht_iter { + struct cds_lfht_iter iter; +}; + +struct lttng_ht_node_str { + char *key; + struct cds_lfht_node node; + struct rcu_head head; +}; + +struct lttng_ht_node_ulong { + unsigned long key; + struct cds_lfht_node node; + struct rcu_head head; +}; + +/* Hashtable new and destroy */ +extern struct lttng_ht *lttng_ht_new(unsigned long size, int type); +extern void lttng_ht_destroy(struct lttng_ht *ht); + +/* Specialized node init and free functions */ +extern void lttng_ht_node_init_str(struct lttng_ht_node_str *node, char *key); +extern void lttng_ht_node_init_ulong(struct lttng_ht_node_ulong *node, + unsigned long key); +extern void lttng_ht_node_free_str(struct lttng_ht_node_str *node); +extern void lttng_ht_node_free_ulong(struct lttng_ht_node_ulong *node); + +extern void lttng_ht_lookup(struct lttng_ht *ht, void *key, + struct lttng_ht_iter *iter); + +/* Specialized add unique functions */ +extern void lttng_ht_add_unique_str(struct lttng_ht *ht, + struct lttng_ht_node_str *node); +extern void lttng_ht_add_unique_ulong(struct lttng_ht *ht, + struct lttng_ht_node_ulong *node); + +extern int lttng_ht_del(struct lttng_ht *ht, struct lttng_ht_iter *iter); + +extern void lttng_ht_get_first(struct lttng_ht *ht, + struct lttng_ht_iter *iter); +extern void lttng_ht_get_next(struct lttng_ht *ht, struct lttng_ht_iter *iter); + +extern unsigned long lttng_ht_get_count(struct lttng_ht *ht); + +extern struct lttng_ht_node_str *lttng_ht_iter_get_node_str( + struct lttng_ht_iter *iter); +extern struct lttng_ht_node_ulong *lttng_ht_iter_get_node_ulong( + struct lttng_ht_iter *iter); + +#endif /* _LTT_HT_H */ diff --git a/include/lttng-share.h b/include/lttng-share.h index 708a1f009..41d659b6f 100644 --- a/include/lttng-share.h +++ b/include/lttng-share.h @@ -24,7 +24,7 @@ #include /* Default size of a hash table */ -#define DEFAULT_HT_SIZE 32 +#define DEFAULT_HT_SIZE 4 /* Default channel attributes */ #define DEFAULT_CHANNEL_NAME "channel0" diff --git a/liblttng-ht/Makefile.am b/liblttng-ht/Makefile.am new file mode 100644 index 000000000..bd85018d7 --- /dev/null +++ b/liblttng-ht/Makefile.am @@ -0,0 +1,13 @@ +AM_CPPFLAGS = -I$(top_srcdir)/include + +noinst_LTLIBRARIES = liblttng-ht.la + +liblttng_ht_la_SOURCES = lttng-ht.c \ + utils.c utils.h \ + rculfhash-internal.h urcu-flavor.h \ + rculfhash.h rculfhash.c \ + rculfhash-mm-chunk.c \ + rculfhash-mm-mmap.c \ + rculfhash-mm-order.c + +liblttng_ht_la_LIBADD = -lurcu-common -lurcu diff --git a/liblttng-ht/lttng-ht.c b/liblttng-ht/lttng-ht.c new file mode 100644 index 000000000..608b3af10 --- /dev/null +++ b/liblttng-ht/lttng-ht.c @@ -0,0 +1,284 @@ +/* + * Copyright (C) 2011 - David Goulet + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; only version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include + +#include +#include +#include + +#include "utils.h" + +#define HASH_SEED 0x42UL /* The answer to life */ + +static unsigned long min_hash_alloc_size = 1; +static unsigned long max_hash_buckets_size = (1UL << 20); + +/* + * Match function for string node. + */ +static int match_str(struct cds_lfht_node *node, const void *key) +{ + struct lttng_ht_node_str *match_node = + caa_container_of(node, struct lttng_ht_node_str, node); + + return hash_match_key_str(match_node->key, (void *) key); +} + +/* + * Match function for ulong node. + */ +static int match_ulong(struct cds_lfht_node *node, const void *key) +{ + struct lttng_ht_node_ulong *match_node = + caa_container_of(node, struct lttng_ht_node_ulong, node); + + return hash_match_key_ulong((void *) match_node->key, (void *) key); +} + +/* + * Return an allocated lttng hashtable. + */ +struct lttng_ht *lttng_ht_new(unsigned long size, int type) +{ + struct lttng_ht *ht; + + /* Test size */ + size != 0 ? : (size = DEFAULT_HT_SIZE); + + ht = zmalloc(sizeof(*ht)); + if (ht == NULL) { + PERROR("zmalloc lttng_ht"); + goto error; + } + + ht->ht = cds_lfht_new(size, min_hash_alloc_size, max_hash_buckets_size, + CDS_LFHT_AUTO_RESIZE, NULL); + /* + * There is already an assert in the RCU hashtable code so if the ht is + * NULL here there is a *huge* problem. + */ + assert(ht->ht); + + switch (type) { + case LTTNG_HT_TYPE_STRING: + ht->match_fct = match_str; + ht->hash_fct = hash_key_str; + break; + case LTTNG_HT_TYPE_ULONG: + ht->match_fct = match_ulong; + ht->hash_fct = hash_key_ulong; + break; + default: + ERR("Unknown lttng hashtable type %d", type); + goto error; + } + + DBG3("Created hashtable size %lu at %p of type %d", size, ht->ht, type); + + return ht; + +error: + return NULL; +} + +/* + * Free a lttng hashtable. + */ +void lttng_ht_destroy(struct lttng_ht *ht) +{ + int ret; + + ret = cds_lfht_destroy(ht->ht, NULL); + assert(!ret); +} + +/* + * Init lttng ht node string. + */ +void lttng_ht_node_init_str(struct lttng_ht_node_str *node, char *key) +{ + assert(node); + + node->key = key; + cds_lfht_node_init(&node->node); +} + +/* + * Init lttng ht node unsigned long. + */ +void lttng_ht_node_init_ulong(struct lttng_ht_node_ulong *node, + unsigned long key) +{ + assert(node); + + node->key = key; + cds_lfht_node_init(&node->node); +} + +/* + * Free lttng ht node string. + */ +void lttng_ht_node_free_str(struct lttng_ht_node_str *node) +{ + assert(node); + free(node); +} + +/* + * Free lttng ht node unsigned long. + */ +void lttng_ht_node_free_ulong(struct lttng_ht_node_ulong *node) +{ + assert(node); + free(node); +} + +/* + * Lookup function in hashtable. + */ +void lttng_ht_lookup(struct lttng_ht *ht, void *key, + struct lttng_ht_iter *iter) +{ + assert(ht); + assert(ht->ht); + assert(key); + + cds_lfht_lookup(ht->ht, ht->hash_fct(key, HASH_SEED), + ht->match_fct, key, &iter->iter); +} + +/* + * Add unique string node to hashtable. + */ +void lttng_ht_add_unique_str(struct lttng_ht *ht, + struct lttng_ht_node_str *node) +{ + struct cds_lfht_node *node_ptr; + assert(ht); + assert(ht->ht); + assert(node); + + node_ptr = cds_lfht_add_unique(ht->ht, ht->hash_fct(node->key, HASH_SEED), + ht->match_fct, node->key, &node->node); + assert(node_ptr == &node->node); +} + +/* + * Add unique unsigned long node to hashtable. + */ +void lttng_ht_add_unique_ulong(struct lttng_ht *ht, + struct lttng_ht_node_ulong *node) +{ + struct cds_lfht_node *node_ptr; + assert(ht); + assert(ht->ht); + assert(node); + + node_ptr = cds_lfht_add_unique(ht->ht, + ht->hash_fct((void *) node->key, HASH_SEED), ht->match_fct, + (void *) node->key, &node->node); + assert(node_ptr == &node->node); +} + +/* + * Delete node from hashtable. + */ +int lttng_ht_del(struct lttng_ht *ht, struct lttng_ht_iter *iter) +{ + assert(ht); + assert(ht->ht); + assert(iter); + + return cds_lfht_del(ht->ht, iter->iter.node); +} + +/* + * Get first node in the hashtable. + */ +void lttng_ht_get_first(struct lttng_ht *ht, struct lttng_ht_iter *iter) +{ + assert(ht); + assert(ht->ht); + assert(iter); + + cds_lfht_first(ht->ht, &iter->iter); +} + +/* + * Get next node in the hashtable. + */ +void lttng_ht_get_next(struct lttng_ht *ht, struct lttng_ht_iter *iter) +{ + assert(ht); + assert(ht->ht); + assert(iter); + + cds_lfht_next(ht->ht, &iter->iter); +} + +/* + * Return the number of nodes in the hashtable. + */ +unsigned long lttng_ht_get_count(struct lttng_ht *ht) +{ + long scb, sca; + unsigned long count; + + assert(ht); + assert(ht->ht); + + cds_lfht_count_nodes(ht->ht, &scb, &count, &sca); + + return count; +} + +/* + * Return lttng ht string node from iterator. + */ +struct lttng_ht_node_str *lttng_ht_iter_get_node_str( + struct lttng_ht_iter *iter) +{ + struct cds_lfht_node *node; + + assert(iter); + node = cds_lfht_iter_get_node(&iter->iter); + if (!node) { + return NULL; + } + return caa_container_of(node, struct lttng_ht_node_str, node); +} + +/* + * Return lttng ht unsigned long node from iterator. + */ +struct lttng_ht_node_ulong *lttng_ht_iter_get_node_ulong( + struct lttng_ht_iter *iter) +{ + struct cds_lfht_node *node; + + assert(iter); + node = cds_lfht_iter_get_node(&iter->iter); + if (!node) { + return NULL; + } + return caa_container_of(node, struct lttng_ht_node_ulong, node); +} diff --git a/liblttng-ht/rculfhash-internal.h b/liblttng-ht/rculfhash-internal.h new file mode 100644 index 000000000..cb13ffa73 --- /dev/null +++ b/liblttng-ht/rculfhash-internal.h @@ -0,0 +1,177 @@ +#ifndef _URCU_RCULFHASH_INTERNAL_H +#define _URCU_RCULFHASH_INTERNAL_H + +/* + * urcu/rculfhash-internal.h + * + * Internal header for Lock-Free RCU Hash Table + * + * Copyright 2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "rculfhash.h" + +#ifdef DEBUG +#define dbg_printf(fmt, args...) printf("[debug rculfhash] " fmt, ## args) +#else +#define dbg_printf(fmt, args...) +#endif + +#if (CAA_BITS_PER_LONG == 32) +#define MAX_TABLE_ORDER 32 +#else +#define MAX_TABLE_ORDER 64 +#endif + +#define MAX_CHUNK_TABLE (1UL << 10) + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + +struct ht_items_count; + +/* + * cds_lfht: Top-level data structure representing a lock-free hash + * table. Defined in the implementation file to make it be an opaque + * cookie to users. + * + * The fields used in fast-paths are placed near the end of the + * structure, because we need to have a variable-sized union to contain + * the mm plugin fields, which are used in the fast path. + */ +struct cds_lfht { + /* Initial configuration items */ + unsigned long max_nr_buckets; + const struct cds_lfht_mm_type *mm; /* memory management plugin */ + const struct rcu_flavor_struct *flavor; /* RCU flavor */ + + long count; /* global approximate item count */ + + /* + * We need to put the work threads offline (QSBR) when taking this + * mutex, because we use synchronize_rcu within this mutex critical + * section, which waits on read-side critical sections, and could + * therefore cause grace-period deadlock if we hold off RCU G.P. + * completion. + */ + pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */ + pthread_attr_t *resize_attr; /* Resize threads attributes */ + unsigned int in_progress_resize, in_progress_destroy; + unsigned long resize_target; + int resize_initiated; + + /* + * Variables needed for add and remove fast-paths. + */ + int flags; + unsigned long min_alloc_buckets_order; + unsigned long min_nr_alloc_buckets; + struct ht_items_count *split_count; /* split item count */ + + /* + * Variables needed for the lookup, add and remove fast-paths. + */ + unsigned long size; /* always a power of 2, shared (RCU) */ + /* + * bucket_at pointer is kept here to skip the extra level of + * dereference needed to get to "mm" (this is a fast-path). + */ + struct cds_lfht_node *(*bucket_at)(struct cds_lfht *ht, + unsigned long index); + /* + * Dynamic length "tbl_chunk" needs to be at the end of + * cds_lfht. + */ + union { + /* + * Contains the per order-index-level bucket node table. + * The size of each bucket node table is half the number + * of hashes contained in this order (except for order 0). + * The minimum allocation buckets size parameter allows + * combining the bucket node arrays of the lowermost + * levels to improve cache locality for small index orders. + */ + struct cds_lfht_node *tbl_order[MAX_TABLE_ORDER]; + + /* + * Contains the bucket node chunks. The size of each + * bucket node chunk is ->min_alloc_size (we avoid to + * allocate chunks with different size). Chunks improve + * cache locality for small index orders, and are more + * friendly with environments where allocation of large + * contiguous memory areas is challenging due to memory + * fragmentation concerns or inability to use virtual + * memory addressing. + */ + struct cds_lfht_node *tbl_chunk[0]; + + /* + * Memory mapping with room for all possible buckets. + * Their memory is allocated when needed. + */ + struct cds_lfht_node *tbl_mmap; + }; + /* + * End of variables needed for the lookup, add and remove + * fast-paths. + */ +}; + +extern unsigned int cds_lfht_fls_ulong(unsigned long x); +extern int cds_lfht_get_count_order_ulong(unsigned long x); + +#ifdef POISON_FREE +#define poison_free(ptr) \ + do { \ + if (ptr) { \ + memset(ptr, 0x42, sizeof(*(ptr))); \ + free(ptr); \ + } \ + } while (0) +#else +#define poison_free(ptr) free(ptr) +#endif + +static inline +struct cds_lfht *__default_alloc_cds_lfht( + const struct cds_lfht_mm_type *mm, + unsigned long cds_lfht_size, + unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + struct cds_lfht *ht; + + ht = calloc(1, cds_lfht_size); + assert(ht); + + ht->mm = mm; + ht->bucket_at = mm->bucket_at; + ht->min_nr_alloc_buckets = min_nr_alloc_buckets; + ht->min_alloc_buckets_order = + cds_lfht_get_count_order_ulong(min_nr_alloc_buckets); + ht->max_nr_buckets = max_nr_buckets; + + return ht; +} + +#endif /* _URCU_RCULFHASH_INTERNAL_H */ diff --git a/liblttng-ht/rculfhash-mm-chunk.c b/liblttng-ht/rculfhash-mm-chunk.c new file mode 100644 index 000000000..7204831e5 --- /dev/null +++ b/liblttng-ht/rculfhash-mm-chunk.c @@ -0,0 +1,97 @@ +/* + * rculfhash-mm-chunk.c + * + * Chunk based memory management for Lock-Free RCU Hash Table + * + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "rculfhash-internal.h" + +static +void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order) +{ + if (order == 0) { + ht->tbl_chunk[0] = calloc(ht->min_nr_alloc_buckets, + sizeof(struct cds_lfht_node)); + assert(ht->tbl_chunk[0]); + } else if (order > ht->min_alloc_buckets_order) { + unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_buckets_order); + + for (i = len; i < 2 * len; i++) { + ht->tbl_chunk[i] = calloc(ht->min_nr_alloc_buckets, + sizeof(struct cds_lfht_node)); + assert(ht->tbl_chunk[i]); + } + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +/* + * cds_lfht_free_bucket_table() should be called with decreasing order. + * When cds_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order) +{ + if (order == 0) + poison_free(ht->tbl_chunk[0]); + else if (order > ht->min_alloc_buckets_order) { + unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_buckets_order); + + for (i = len; i < 2 * len; i++) + poison_free(ht->tbl_chunk[i]); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +static +struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index) +{ + unsigned long chunk, offset; + + chunk = index >> ht->min_alloc_buckets_order; + offset = index & (ht->min_nr_alloc_buckets - 1); + return &ht->tbl_chunk[chunk][offset]; +} + +static +struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + unsigned long nr_chunks, cds_lfht_size; + + min_nr_alloc_buckets = max(min_nr_alloc_buckets, + max_nr_buckets / MAX_CHUNK_TABLE); + nr_chunks = max_nr_buckets / min_nr_alloc_buckets; + cds_lfht_size = offsetof(struct cds_lfht, tbl_chunk) + + sizeof(struct cds_lfht_node *) * nr_chunks; + cds_lfht_size = max(cds_lfht_size, sizeof(struct cds_lfht)); + + return __default_alloc_cds_lfht( + &cds_lfht_mm_chunk, cds_lfht_size, + min_nr_alloc_buckets, max_nr_buckets); +} + +const struct cds_lfht_mm_type cds_lfht_mm_chunk = { + .alloc_cds_lfht = alloc_cds_lfht, + .alloc_bucket_table = cds_lfht_alloc_bucket_table, + .free_bucket_table = cds_lfht_free_bucket_table, + .bucket_at = bucket_at, +}; diff --git a/liblttng-ht/rculfhash-mm-mmap.c b/liblttng-ht/rculfhash-mm-mmap.c new file mode 100644 index 000000000..4554ed600 --- /dev/null +++ b/liblttng-ht/rculfhash-mm-mmap.c @@ -0,0 +1,156 @@ +/* + * rculfhash-mm-mmap.c + * + * mmap/reservation based memory management for Lock-Free RCU Hash Table + * + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include "rculfhash-internal.h" + +/* reserve inaccessible memory space without allocation any memory */ +static void *memory_map(size_t length) +{ + void *ret = mmap(NULL, length, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + assert(ret != MAP_FAILED); + return ret; +} + +static void memory_unmap(void *ptr, size_t length) +{ + int ret __attribute__((unused)); + + ret = munmap(ptr, length); + + assert(ret == 0); +} + +static void memory_populate(void *ptr, size_t length) +{ + void *ret __attribute__((unused)); + + ret = mmap(ptr, length, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + assert(ret == ptr); +} + +/* + * Discard garbage memory and avoid system save it when try to swap it out. + * Make it still reserved, inaccessible. + */ +static void memory_discard(void *ptr, size_t length) +{ + void *ret __attribute__((unused)); + + ret = mmap(ptr, length, PROT_NONE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + assert(ret == ptr); +} + +static +void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order) +{ + if (order == 0) { + if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { + /* small table */ + ht->tbl_mmap = calloc(ht->max_nr_buckets, + sizeof(*ht->tbl_mmap)); + assert(ht->tbl_mmap); + return; + } + /* large table */ + ht->tbl_mmap = memory_map(ht->max_nr_buckets + * sizeof(*ht->tbl_mmap)); + memory_populate(ht->tbl_mmap, + ht->min_nr_alloc_buckets * sizeof(*ht->tbl_mmap)); + } else if (order > ht->min_alloc_buckets_order) { + /* large table */ + unsigned long len = 1UL << (order - 1); + + assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); + memory_populate(ht->tbl_mmap + len, + len * sizeof(*ht->tbl_mmap)); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +/* + * cds_lfht_free_bucket_table() should be called with decreasing order. + * When cds_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order) +{ + if (order == 0) { + if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { + /* small table */ + poison_free(ht->tbl_mmap); + return; + } + /* large table */ + memory_unmap(ht->tbl_mmap, + ht->max_nr_buckets * sizeof(*ht->tbl_mmap)); + } else if (order > ht->min_alloc_buckets_order) { + /* large table */ + unsigned long len = 1UL << (order - 1); + + assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); + memory_discard(ht->tbl_mmap + len, len * sizeof(*ht->tbl_mmap)); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +static +struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index) +{ + return &ht->tbl_mmap[index]; +} + +static +struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + unsigned long page_bucket_size; + + page_bucket_size = getpagesize() / sizeof(struct cds_lfht_node); + if (max_nr_buckets <= page_bucket_size) { + /* small table */ + min_nr_alloc_buckets = max_nr_buckets; + } else { + /* large table */ + min_nr_alloc_buckets = max(min_nr_alloc_buckets, + page_bucket_size); + } + + return __default_alloc_cds_lfht( + &cds_lfht_mm_mmap, sizeof(struct cds_lfht), + min_nr_alloc_buckets, max_nr_buckets); +} + +const struct cds_lfht_mm_type cds_lfht_mm_mmap = { + .alloc_cds_lfht = alloc_cds_lfht, + .alloc_bucket_table = cds_lfht_alloc_bucket_table, + .free_bucket_table = cds_lfht_free_bucket_table, + .bucket_at = bucket_at, +}; diff --git a/liblttng-ht/rculfhash-mm-order.c b/liblttng-ht/rculfhash-mm-order.c new file mode 100644 index 000000000..6e3d29bb3 --- /dev/null +++ b/liblttng-ht/rculfhash-mm-order.c @@ -0,0 +1,90 @@ +/* + * rculfhash-mm-order.c + * + * Order based memory management for Lock-Free RCU Hash Table + * + * Copyright 2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "rculfhash-internal.h" + +static +void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order) +{ + if (order == 0) { + ht->tbl_order[0] = calloc(ht->min_nr_alloc_buckets, + sizeof(struct cds_lfht_node)); + assert(ht->tbl_order[0]); + } else if (order > ht->min_alloc_buckets_order) { + ht->tbl_order[order] = calloc(1UL << (order -1), + sizeof(struct cds_lfht_node)); + assert(ht->tbl_order[order]); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +/* + * cds_lfht_free_bucket_table() should be called with decreasing order. + * When cds_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order) +{ + if (order == 0) + poison_free(ht->tbl_order[0]); + else if (order > ht->min_alloc_buckets_order) + poison_free(ht->tbl_order[order]); + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +static +struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index) +{ + unsigned long order; + + if (index < ht->min_nr_alloc_buckets) { + dbg_printf("bucket index %lu order 0 aridx 0\n", index); + return &ht->tbl_order[0][index]; + } + /* + * equivalent to cds_lfht_get_count_order_ulong(index + 1), but + * optimizes away the non-existing 0 special-case for + * cds_lfht_get_count_order_ulong. + */ + order = cds_lfht_fls_ulong(index); + dbg_printf("bucket index %lu order %lu aridx %lu\n", + index, order, index & ((1UL << (order - 1)) - 1)); + return &ht->tbl_order[order][index & ((1UL << (order - 1)) - 1)]; +} + +static +struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + return __default_alloc_cds_lfht( + &cds_lfht_mm_order, sizeof(struct cds_lfht), + min_nr_alloc_buckets, max_nr_buckets); +} + +const struct cds_lfht_mm_type cds_lfht_mm_order = { + .alloc_cds_lfht = alloc_cds_lfht, + .alloc_bucket_table = cds_lfht_alloc_bucket_table, + .free_bucket_table = cds_lfht_free_bucket_table, + .bucket_at = bucket_at, +}; diff --git a/liblttng-ht/rculfhash.c b/liblttng-ht/rculfhash.c new file mode 100644 index 000000000..840de351b --- /dev/null +++ b/liblttng-ht/rculfhash.c @@ -0,0 +1,1832 @@ +/* + * rculfhash.c + * + * Userspace RCU library - Lock-Free Resizable RCU Hash Table + * + * Copyright 2010-2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Based on the following articles: + * - Ori Shalev and Nir Shavit. Split-ordered lists: Lock-free + * extensible hash tables. J. ACM 53, 3 (May 2006), 379-405. + * - Michael, M. M. High performance dynamic lock-free hash tables + * and list-based sets. In Proceedings of the fourteenth annual ACM + * symposium on Parallel algorithms and architectures, ACM Press, + * (2002), 73-82. + * + * Some specificities of this Lock-Free Resizable RCU Hash Table + * implementation: + * + * - RCU read-side critical section allows readers to perform hash + * table lookups and use the returned objects safely by delaying + * memory reclaim of a grace period. + * - Add and remove operations are lock-free, and do not need to + * allocate memory. They need to be executed within RCU read-side + * critical section to ensure the objects they read are valid and to + * deal with the cmpxchg ABA problem. + * - add and add_unique operations are supported. add_unique checks if + * the node key already exists in the hash table. It ensures no key + * duplicata exists. + * - The resize operation executes concurrently with add/remove/lookup. + * - Hash table nodes are contained within a split-ordered list. This + * list is ordered by incrementing reversed-bits-hash value. + * - An index of bucket nodes is kept. These bucket nodes are the hash + * table "buckets", and they are also chained together in the + * split-ordered list, which allows recursive expansion. + * - The resize operation for small tables only allows expanding the hash table. + * It is triggered automatically by detecting long chains in the add + * operation. + * - The resize operation for larger tables (and available through an + * API) allows both expanding and shrinking the hash table. + * - Split-counters are used to keep track of the number of + * nodes within the hash table for automatic resize triggering. + * - Resize operation initiated by long chain detection is executed by a + * call_rcu thread, which keeps lock-freedom of add and remove. + * - Resize operations are protected by a mutex. + * - The removal operation is split in two parts: first, a "removed" + * flag is set in the next pointer within the node to remove. Then, + * a "garbage collection" is performed in the bucket containing the + * removed node (from the start of the bucket up to the removed node). + * All encountered nodes with "removed" flag set in their next + * pointers are removed from the linked-list. If the cmpxchg used for + * removal fails (due to concurrent garbage-collection or concurrent + * add), we retry from the beginning of the bucket. This ensures that + * the node with "removed" flag set is removed from the hash table + * (not visible to lookups anymore) before the RCU read-side critical + * section held across removal ends. Furthermore, this ensures that + * the node with "removed" flag set is removed from the linked-list + * before its memory is reclaimed. Only the thread which removal + * successfully set the "removed" flag (with a cmpxchg) into a node's + * next pointer is considered to have succeeded its removal (and thus + * owns the node to reclaim). Because we garbage-collect starting from + * an invariant node (the start-of-bucket bucket node) up to the + * "removed" node (or find a reverse-hash that is higher), we are sure + * that a successful traversal of the chain leads to a chain that is + * present in the linked-list (the start node is never removed) and + * that is does not contain the "removed" node anymore, even if + * concurrent delete/add operations are changing the structure of the + * list concurrently. + * - The add operation performs gargage collection of buckets if it + * encounters nodes with removed flag set in the bucket where it wants + * to add its new node. This ensures lock-freedom of add operation by + * helping the remover unlink nodes from the list rather than to wait + * for it do to so. + * - A RCU "order table" indexed by log2(hash index) is copied and + * expanded by the resize operation. This order table allows finding + * the "bucket node" tables. + * - There is one bucket node table per hash index order. The size of + * each bucket node table is half the number of hashes contained in + * this order (except for order 0). + * - synchronzie_rcu is used to garbage-collect the old bucket node table. + * - The per-order bucket node tables contain a compact version of the + * hash table nodes. These tables are invariant after they are + * populated into the hash table. + * + * Bucket node tables: + * + * hash table hash table the last all bucket node tables + * order size bucket node 0 1 2 3 4 5 6(index) + * table size + * 0 1 1 1 + * 1 2 1 1 1 + * 2 4 2 1 1 2 + * 3 8 4 1 1 2 4 + * 4 16 8 1 1 2 4 8 + * 5 32 16 1 1 2 4 8 16 + * 6 64 32 1 1 2 4 8 16 32 + * + * When growing/shrinking, we only focus on the last bucket node table + * which size is (!order ? 1 : (1 << (order -1))). + * + * Example for growing/shrinking: + * grow hash table from order 5 to 6: init the index=6 bucket node table + * shrink hash table from order 6 to 5: fini the index=6 bucket node table + * + * A bit of ascii art explanation: + * + * Order index is the off-by-one compare to the actual power of 2 because + * we use index 0 to deal with the 0 special-case. + * + * This shows the nodes for a small table ordered by reversed bits: + * + * bits reverse + * 0 000 000 + * 4 100 001 + * 2 010 010 + * 6 110 011 + * 1 001 100 + * 5 101 101 + * 3 011 110 + * 7 111 111 + * + * This shows the nodes in order of non-reversed bits, linked by + * reversed-bit order. + * + * order bits reverse + * 0 0 000 000 + * 1 | 1 001 100 <- + * 2 | | 2 010 010 <- | + * | | | 3 011 110 | <- | + * 3 -> | | | 4 100 001 | | + * -> | | 5 101 101 | + * -> | 6 110 011 + * -> 7 111 111 + */ + +#define _LGPL_SOURCE +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include +#include +#include +#include +#include +#include +#include + +#include "rculfhash.h" +#include "rculfhash-internal.h" +#include "urcu-flavor.h" + +/* + * Split-counters lazily update the global counter each 1024 + * addition/removal. It automatically keeps track of resize required. + * We use the bucket length as indicator for need to expand for small + * tables and machines lacking per-cpu data suppport. + */ +#define COUNT_COMMIT_ORDER 10 +#define DEFAULT_SPLIT_COUNT_MASK 0xFUL +#define CHAIN_LEN_TARGET 1 +#define CHAIN_LEN_RESIZE_THRESHOLD 3 + +/* + * Define the minimum table size. + */ +#define MIN_TABLE_ORDER 0 +#define MIN_TABLE_SIZE (1UL << MIN_TABLE_ORDER) + +/* + * Minimum number of bucket nodes to touch per thread to parallelize grow/shrink. + */ +#define MIN_PARTITION_PER_THREAD_ORDER 12 +#define MIN_PARTITION_PER_THREAD (1UL << MIN_PARTITION_PER_THREAD_ORDER) + +/* + * The removed flag needs to be updated atomically with the pointer. + * It indicates that no node must attach to the node scheduled for + * removal, and that node garbage collection must be performed. + * The bucket flag does not require to be updated atomically with the + * pointer, but it is added as a pointer low bit flag to save space. + */ +#define REMOVED_FLAG (1UL << 0) +#define BUCKET_FLAG (1UL << 1) +#define REMOVAL_OWNER_FLAG (1UL << 2) +#define FLAGS_MASK ((1UL << 3) - 1) + +/* Value of the end pointer. Should not interact with flags. */ +#define END_VALUE NULL + +DEFINE_RCU_FLAVOR(rcu_flavor); + +/* + * ht_items_count: Split-counters counting the number of node addition + * and removal in the table. Only used if the CDS_LFHT_ACCOUNTING flag + * is set at hash table creation. + * + * These are free-running counters, never reset to zero. They count the + * number of add/remove, and trigger every (1 << COUNT_COMMIT_ORDER) + * operations to update the global counter. We choose a power-of-2 value + * for the trigger to deal with 32 or 64-bit overflow of the counter. + */ +struct ht_items_count { + unsigned long add, del; +} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + +/* + * rcu_resize_work: Contains arguments passed to RCU worker thread + * responsible for performing lazy resize. + */ +struct rcu_resize_work { + struct rcu_head head; + struct cds_lfht *ht; +}; + +/* + * partition_resize_work: Contains arguments passed to worker threads + * executing the hash table resize on partitions of the hash table + * assigned to each processor's worker thread. + */ +struct partition_resize_work { + pthread_t thread_id; + struct cds_lfht *ht; + unsigned long i, start, len; + void (*fct)(struct cds_lfht *ht, unsigned long i, + unsigned long start, unsigned long len); +}; + +/* + * Algorithm to reverse bits in a word by lookup table, extended to + * 64-bit words. + * Source: + * http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable + * Originally from Public Domain. + */ + +static const uint8_t BitReverseTable256[256] = +{ +#define R2(n) (n), (n) + 2*64, (n) + 1*64, (n) + 3*64 +#define R4(n) R2(n), R2((n) + 2*16), R2((n) + 1*16), R2((n) + 3*16) +#define R6(n) R4(n), R4((n) + 2*4 ), R4((n) + 1*4 ), R4((n) + 3*4 ) + R6(0), R6(2), R6(1), R6(3) +}; +#undef R2 +#undef R4 +#undef R6 + +static +uint8_t bit_reverse_u8(uint8_t v) +{ + return BitReverseTable256[v]; +} + +static __attribute__((unused)) +uint32_t bit_reverse_u32(uint32_t v) +{ + return ((uint32_t) bit_reverse_u8(v) << 24) | + ((uint32_t) bit_reverse_u8(v >> 8) << 16) | + ((uint32_t) bit_reverse_u8(v >> 16) << 8) | + ((uint32_t) bit_reverse_u8(v >> 24)); +} + +static __attribute__((unused)) +uint64_t bit_reverse_u64(uint64_t v) +{ + return ((uint64_t) bit_reverse_u8(v) << 56) | + ((uint64_t) bit_reverse_u8(v >> 8) << 48) | + ((uint64_t) bit_reverse_u8(v >> 16) << 40) | + ((uint64_t) bit_reverse_u8(v >> 24) << 32) | + ((uint64_t) bit_reverse_u8(v >> 32) << 24) | + ((uint64_t) bit_reverse_u8(v >> 40) << 16) | + ((uint64_t) bit_reverse_u8(v >> 48) << 8) | + ((uint64_t) bit_reverse_u8(v >> 56)); +} + +static +unsigned long bit_reverse_ulong(unsigned long v) +{ +#if (CAA_BITS_PER_LONG == 32) + return bit_reverse_u32(v); +#else + return bit_reverse_u64(v); +#endif +} + +/* + * fls: returns the position of the most significant bit. + * Returns 0 if no bit is set, else returns the position of the most + * significant bit (from 1 to 32 on 32-bit, from 1 to 64 on 64-bit). + */ +#if defined(__i386) || defined(__x86_64) +static inline +unsigned int fls_u32(uint32_t x) +{ + int r; + + asm("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n\t" + "1:\n\t" + : "=r" (r) : "rm" (x)); + return r + 1; +} +#define HAS_FLS_U32 +#endif + +#if defined(__x86_64) +static inline +unsigned int fls_u64(uint64_t x) +{ + long r; + + asm("bsrq %1,%0\n\t" + "jnz 1f\n\t" + "movq $-1,%0\n\t" + "1:\n\t" + : "=r" (r) : "rm" (x)); + return r + 1; +} +#define HAS_FLS_U64 +#endif + +#ifndef HAS_FLS_U64 +static __attribute__((unused)) +unsigned int fls_u64(uint64_t x) +{ + unsigned int r = 64; + + if (!x) + return 0; + + if (!(x & 0xFFFFFFFF00000000ULL)) { + x <<= 32; + r -= 32; + } + if (!(x & 0xFFFF000000000000ULL)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF00000000000000ULL)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF000000000000000ULL)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC000000000000000ULL)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x8000000000000000ULL)) { + x <<= 1; + r -= 1; + } + return r; +} +#endif + +#ifndef HAS_FLS_U32 +static __attribute__((unused)) +unsigned int fls_u32(uint32_t x) +{ + unsigned int r = 32; + + if (!x) + return 0; + if (!(x & 0xFFFF0000U)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF000000U)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF0000000U)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC0000000U)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000U)) { + x <<= 1; + r -= 1; + } + return r; +} +#endif + +unsigned int cds_lfht_fls_ulong(unsigned long x) +{ +#if (CAA_BITS_PER_LONG == 32) + return fls_u32(x); +#else + return fls_u64(x); +#endif +} + +/* + * Return the minimum order for which x <= (1UL << order). + * Return -1 if x is 0. + */ +int cds_lfht_get_count_order_u32(uint32_t x) +{ + if (!x) + return -1; + + return fls_u32(x - 1); +} + +/* + * Return the minimum order for which x <= (1UL << order). + * Return -1 if x is 0. + */ +int cds_lfht_get_count_order_ulong(unsigned long x) +{ + if (!x) + return -1; + + return cds_lfht_fls_ulong(x - 1); +} + +static +void cds_lfht_resize_lazy_grow(struct cds_lfht *ht, unsigned long size, int growth); + +static +void cds_lfht_resize_lazy_count(struct cds_lfht *ht, unsigned long size, + unsigned long count); + +static long nr_cpus_mask = -1; +static long split_count_mask = -1; + +#if defined(HAVE_SYSCONF) +static void ht_init_nr_cpus_mask(void) +{ + long maxcpus; + + maxcpus = sysconf(_SC_NPROCESSORS_CONF); + if (maxcpus <= 0) { + nr_cpus_mask = -2; + return; + } + /* + * round up number of CPUs to next power of two, so we + * can use & for modulo. + */ + maxcpus = 1UL << cds_lfht_get_count_order_ulong(maxcpus); + nr_cpus_mask = maxcpus - 1; +} +#else /* #if defined(HAVE_SYSCONF) */ +static void ht_init_nr_cpus_mask(void) +{ + nr_cpus_mask = -2; +} +#endif /* #else #if defined(HAVE_SYSCONF) */ + +static +void alloc_split_items_count(struct cds_lfht *ht) +{ + struct ht_items_count *count; + + if (nr_cpus_mask == -1) { + ht_init_nr_cpus_mask(); + if (nr_cpus_mask < 0) + split_count_mask = DEFAULT_SPLIT_COUNT_MASK; + else + split_count_mask = nr_cpus_mask; + } + + assert(split_count_mask >= 0); + + if (ht->flags & CDS_LFHT_ACCOUNTING) { + ht->split_count = calloc(split_count_mask + 1, sizeof(*count)); + assert(ht->split_count); + } else { + ht->split_count = NULL; + } +} + +static +void free_split_items_count(struct cds_lfht *ht) +{ + poison_free(ht->split_count); +} + +#if defined(HAVE_SCHED_GETCPU) +static +int ht_get_split_count_index(unsigned long hash) +{ + int cpu; + + assert(split_count_mask >= 0); + cpu = sched_getcpu(); + if (caa_unlikely(cpu < 0)) + return hash & split_count_mask; + else + return cpu & split_count_mask; +} +#else /* #if defined(HAVE_SCHED_GETCPU) */ +static +int ht_get_split_count_index(unsigned long hash) +{ + return hash & split_count_mask; +} +#endif /* #else #if defined(HAVE_SCHED_GETCPU) */ + +static +void ht_count_add(struct cds_lfht *ht, unsigned long size, unsigned long hash) +{ + unsigned long split_count; + int index; + long count; + + if (caa_unlikely(!ht->split_count)) + return; + index = ht_get_split_count_index(hash); + split_count = uatomic_add_return(&ht->split_count[index].add, 1); + if (caa_likely(split_count & ((1UL << COUNT_COMMIT_ORDER) - 1))) + return; + /* Only if number of add multiple of 1UL << COUNT_COMMIT_ORDER */ + + dbg_printf("add split count %lu\n", split_count); + count = uatomic_add_return(&ht->count, + 1UL << COUNT_COMMIT_ORDER); + if (caa_likely(count & (count - 1))) + return; + /* Only if global count is power of 2 */ + + if ((count >> CHAIN_LEN_RESIZE_THRESHOLD) < size) + return; + dbg_printf("add set global %ld\n", count); + cds_lfht_resize_lazy_count(ht, size, + count >> (CHAIN_LEN_TARGET - 1)); +} + +static +void ht_count_del(struct cds_lfht *ht, unsigned long size, unsigned long hash) +{ + unsigned long split_count; + int index; + long count; + + if (caa_unlikely(!ht->split_count)) + return; + index = ht_get_split_count_index(hash); + split_count = uatomic_add_return(&ht->split_count[index].del, 1); + if (caa_likely(split_count & ((1UL << COUNT_COMMIT_ORDER) - 1))) + return; + /* Only if number of deletes multiple of 1UL << COUNT_COMMIT_ORDER */ + + dbg_printf("del split count %lu\n", split_count); + count = uatomic_add_return(&ht->count, + -(1UL << COUNT_COMMIT_ORDER)); + if (caa_likely(count & (count - 1))) + return; + /* Only if global count is power of 2 */ + + if ((count >> CHAIN_LEN_RESIZE_THRESHOLD) >= size) + return; + dbg_printf("del set global %ld\n", count); + /* + * Don't shrink table if the number of nodes is below a + * certain threshold. + */ + if (count < (1UL << COUNT_COMMIT_ORDER) * (split_count_mask + 1)) + return; + cds_lfht_resize_lazy_count(ht, size, + count >> (CHAIN_LEN_TARGET - 1)); +} + +static +void check_resize(struct cds_lfht *ht, unsigned long size, uint32_t chain_len) +{ + unsigned long count; + + if (!(ht->flags & CDS_LFHT_AUTO_RESIZE)) + return; + count = uatomic_read(&ht->count); + /* + * Use bucket-local length for small table expand and for + * environments lacking per-cpu data support. + */ + if (count >= (1UL << COUNT_COMMIT_ORDER)) + return; + if (chain_len > 100) + dbg_printf("WARNING: large chain length: %u.\n", + chain_len); + if (chain_len >= CHAIN_LEN_RESIZE_THRESHOLD) + cds_lfht_resize_lazy_grow(ht, size, + cds_lfht_get_count_order_u32(chain_len - (CHAIN_LEN_TARGET - 1))); +} + +static +struct cds_lfht_node *clear_flag(struct cds_lfht_node *node) +{ + return (struct cds_lfht_node *) (((unsigned long) node) & ~FLAGS_MASK); +} + +static +int is_removed(struct cds_lfht_node *node) +{ + return ((unsigned long) node) & REMOVED_FLAG; +} + +static +struct cds_lfht_node *flag_removed(struct cds_lfht_node *node) +{ + return (struct cds_lfht_node *) (((unsigned long) node) | REMOVED_FLAG); +} + +static +int is_bucket(struct cds_lfht_node *node) +{ + return ((unsigned long) node) & BUCKET_FLAG; +} + +static +struct cds_lfht_node *flag_bucket(struct cds_lfht_node *node) +{ + return (struct cds_lfht_node *) (((unsigned long) node) | BUCKET_FLAG); +} + +static +int is_removal_owner(struct cds_lfht_node *node) +{ + return ((unsigned long) node) & REMOVAL_OWNER_FLAG; +} + +static +struct cds_lfht_node *flag_removal_owner(struct cds_lfht_node *node) +{ + return (struct cds_lfht_node *) (((unsigned long) node) | REMOVAL_OWNER_FLAG); +} + +static +struct cds_lfht_node *get_end(void) +{ + return (struct cds_lfht_node *) END_VALUE; +} + +static +int is_end(struct cds_lfht_node *node) +{ + return clear_flag(node) == (struct cds_lfht_node *) END_VALUE; +} + +static +unsigned long _uatomic_xchg_monotonic_increase(unsigned long *ptr, + unsigned long v) +{ + unsigned long old1, old2; + + old1 = uatomic_read(ptr); + do { + old2 = old1; + if (old2 >= v) + return old2; + } while ((old1 = uatomic_cmpxchg(ptr, old2, v)) != old2); + return old2; +} + +static +void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order) +{ + return ht->mm->alloc_bucket_table(ht, order); +} + +/* + * cds_lfht_free_bucket_table() should be called with decreasing order. + * When cds_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order) +{ + return ht->mm->free_bucket_table(ht, order); +} + +static inline +struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index) +{ + return ht->bucket_at(ht, index); +} + +static inline +struct cds_lfht_node *lookup_bucket(struct cds_lfht *ht, unsigned long size, + unsigned long hash) +{ + assert(size > 0); + return bucket_at(ht, hash & (size - 1)); +} + +/* + * Remove all logically deleted nodes from a bucket up to a certain node key. + */ +static +void _cds_lfht_gc_bucket(struct cds_lfht_node *bucket, struct cds_lfht_node *node) +{ + struct cds_lfht_node *iter_prev, *iter, *next, *new_next; + + assert(!is_bucket(bucket)); + assert(!is_removed(bucket)); + assert(!is_bucket(node)); + assert(!is_removed(node)); + for (;;) { + iter_prev = bucket; + /* We can always skip the bucket node initially */ + iter = rcu_dereference(iter_prev->next); + assert(!is_removed(iter)); + assert(iter_prev->reverse_hash <= node->reverse_hash); + /* + * We should never be called with bucket (start of chain) + * and logically removed node (end of path compression + * marker) being the actual same node. This would be a + * bug in the algorithm implementation. + */ + assert(bucket != node); + for (;;) { + if (caa_unlikely(is_end(iter))) + return; + if (caa_likely(clear_flag(iter)->reverse_hash > node->reverse_hash)) + return; + next = rcu_dereference(clear_flag(iter)->next); + if (caa_likely(is_removed(next))) + break; + iter_prev = clear_flag(iter); + iter = next; + } + assert(!is_removed(iter)); + if (is_bucket(iter)) + new_next = flag_bucket(clear_flag(next)); + else + new_next = clear_flag(next); + (void) uatomic_cmpxchg(&iter_prev->next, iter, new_next); + } +} + +static +int _cds_lfht_replace(struct cds_lfht *ht, unsigned long size, + struct cds_lfht_node *old_node, + struct cds_lfht_node *old_next, + struct cds_lfht_node *new_node) +{ + struct cds_lfht_node *bucket, *ret_next; + + if (!old_node) /* Return -ENOENT if asked to replace NULL node */ + return -ENOENT; + + assert(!is_removed(old_node)); + assert(!is_bucket(old_node)); + assert(!is_removed(new_node)); + assert(!is_bucket(new_node)); + assert(new_node != old_node); + for (;;) { + /* Insert after node to be replaced */ + if (is_removed(old_next)) { + /* + * Too late, the old node has been removed under us + * between lookup and replace. Fail. + */ + return -ENOENT; + } + assert(old_next == clear_flag(old_next)); + assert(new_node != old_next); + new_node->next = old_next; + /* + * Here is the whole trick for lock-free replace: we add + * the replacement node _after_ the node we want to + * replace by atomically setting its next pointer at the + * same time we set its removal flag. Given that + * the lookups/get next use an iterator aware of the + * next pointer, they will either skip the old node due + * to the removal flag and see the new node, or use + * the old node, but will not see the new one. + * This is a replacement of a node with another node + * that has the same value: we are therefore not + * removing a value from the hash table. + */ + ret_next = uatomic_cmpxchg(&old_node->next, + old_next, flag_removed(new_node)); + if (ret_next == old_next) + break; /* We performed the replacement. */ + old_next = ret_next; + } + + /* + * Ensure that the old node is not visible to readers anymore: + * lookup for the node, and remove it (along with any other + * logically removed node) if found. + */ + bucket = lookup_bucket(ht, size, bit_reverse_ulong(old_node->reverse_hash)); + _cds_lfht_gc_bucket(bucket, new_node); + + assert(is_removed(rcu_dereference(old_node->next))); + return 0; +} + +/* + * A non-NULL unique_ret pointer uses the "add unique" (or uniquify) add + * mode. A NULL unique_ret allows creation of duplicate keys. + */ +static +void _cds_lfht_add(struct cds_lfht *ht, + unsigned long hash, + cds_lfht_match_fct match, + const void *key, + unsigned long size, + struct cds_lfht_node *node, + struct cds_lfht_iter *unique_ret, + int bucket_flag) +{ + struct cds_lfht_node *iter_prev, *iter, *next, *new_node, *new_next, + *return_node; + struct cds_lfht_node *bucket; + + assert(!is_bucket(node)); + assert(!is_removed(node)); + bucket = lookup_bucket(ht, size, hash); + for (;;) { + uint32_t chain_len = 0; + + /* + * iter_prev points to the non-removed node prior to the + * insert location. + */ + iter_prev = bucket; + /* We can always skip the bucket node initially */ + iter = rcu_dereference(iter_prev->next); + assert(iter_prev->reverse_hash <= node->reverse_hash); + for (;;) { + if (caa_unlikely(is_end(iter))) + goto insert; + if (caa_likely(clear_flag(iter)->reverse_hash > node->reverse_hash)) + goto insert; + + /* bucket node is the first node of the identical-hash-value chain */ + if (bucket_flag && clear_flag(iter)->reverse_hash == node->reverse_hash) + goto insert; + + next = rcu_dereference(clear_flag(iter)->next); + if (caa_unlikely(is_removed(next))) + goto gc_node; + + /* uniquely add */ + if (unique_ret + && !is_bucket(next) + && clear_flag(iter)->reverse_hash == node->reverse_hash) { + struct cds_lfht_iter d_iter = { .node = node, .next = iter, }; + + /* + * uniquely adding inserts the node as the first + * node of the identical-hash-value node chain. + * + * This semantic ensures no duplicated keys + * should ever be observable in the table + * (including observe one node by one node + * by forward iterations) + */ + cds_lfht_next_duplicate(ht, match, key, &d_iter); + if (!d_iter.node) + goto insert; + + *unique_ret = d_iter; + return; + } + + /* Only account for identical reverse hash once */ + if (iter_prev->reverse_hash != clear_flag(iter)->reverse_hash + && !is_bucket(next)) + check_resize(ht, size, ++chain_len); + iter_prev = clear_flag(iter); + iter = next; + } + + insert: + assert(node != clear_flag(iter)); + assert(!is_removed(iter_prev)); + assert(!is_removed(iter)); + assert(iter_prev != node); + if (!bucket_flag) + node->next = clear_flag(iter); + else + node->next = flag_bucket(clear_flag(iter)); + if (is_bucket(iter)) + new_node = flag_bucket(node); + else + new_node = node; + if (uatomic_cmpxchg(&iter_prev->next, iter, + new_node) != iter) { + continue; /* retry */ + } else { + return_node = node; + goto end; + } + + gc_node: + assert(!is_removed(iter)); + if (is_bucket(iter)) + new_next = flag_bucket(clear_flag(next)); + else + new_next = clear_flag(next); + (void) uatomic_cmpxchg(&iter_prev->next, iter, new_next); + /* retry */ + } +end: + if (unique_ret) { + unique_ret->node = return_node; + /* unique_ret->next left unset, never used. */ + } +} + +static +int _cds_lfht_del(struct cds_lfht *ht, unsigned long size, + struct cds_lfht_node *node) +{ + struct cds_lfht_node *bucket, *next; + + if (!node) /* Return -ENOENT if asked to delete NULL node */ + return -ENOENT; + + /* logically delete the node */ + assert(!is_bucket(node)); + assert(!is_removed(node)); + assert(!is_removal_owner(node)); + + /* + * We are first checking if the node had previously been + * logically removed (this check is not atomic with setting the + * logical removal flag). Return -ENOENT if the node had + * previously been removed. + */ + next = rcu_dereference(node->next); + if (caa_unlikely(is_removed(next))) + return -ENOENT; + assert(!is_bucket(next)); + /* + * We set the REMOVED_FLAG unconditionally. Note that there may + * be more than one concurrent thread setting this flag. + * Knowing which wins the race will be known after the garbage + * collection phase, stay tuned! + */ + uatomic_or(&node->next, REMOVED_FLAG); + /* We performed the (logical) deletion. */ + + /* + * Ensure that the node is not visible to readers anymore: lookup for + * the node, and remove it (along with any other logically removed node) + * if found. + */ + bucket = lookup_bucket(ht, size, bit_reverse_ulong(node->reverse_hash)); + _cds_lfht_gc_bucket(bucket, node); + + assert(is_removed(rcu_dereference(node->next))); + /* + * Last phase: atomically exchange node->next with a version + * having "REMOVAL_OWNER_FLAG" set. If the returned node->next + * pointer did _not_ have "REMOVAL_OWNER_FLAG" set, we now own + * the node and win the removal race. + * It is interesting to note that all "add" paths are forbidden + * to change the next pointer starting from the point where the + * REMOVED_FLAG is set, so here using a read, followed by a + * xchg() suffice to guarantee that the xchg() will ever only + * set the "REMOVAL_OWNER_FLAG" (or change nothing if the flag + * was already set). + */ + if (!is_removal_owner(uatomic_xchg(&node->next, + flag_removal_owner(node->next)))) + return 0; + else + return -ENOENT; +} + +static +void *partition_resize_thread(void *arg) +{ + struct partition_resize_work *work = arg; + + work->ht->flavor->register_thread(); + work->fct(work->ht, work->i, work->start, work->len); + work->ht->flavor->unregister_thread(); + return NULL; +} + +static +void partition_resize_helper(struct cds_lfht *ht, unsigned long i, + unsigned long len, + void (*fct)(struct cds_lfht *ht, unsigned long i, + unsigned long start, unsigned long len)) +{ + unsigned long partition_len; + struct partition_resize_work *work; + int thread, ret; + unsigned long nr_threads; + + /* + * Note: nr_cpus_mask + 1 is always power of 2. + * We spawn just the number of threads we need to satisfy the minimum + * partition size, up to the number of CPUs in the system. + */ + if (nr_cpus_mask > 0) { + nr_threads = min(nr_cpus_mask + 1, + len >> MIN_PARTITION_PER_THREAD_ORDER); + } else { + nr_threads = 1; + } + partition_len = len >> cds_lfht_get_count_order_ulong(nr_threads); + work = calloc(nr_threads, sizeof(*work)); + assert(work); + for (thread = 0; thread < nr_threads; thread++) { + work[thread].ht = ht; + work[thread].i = i; + work[thread].len = partition_len; + work[thread].start = thread * partition_len; + work[thread].fct = fct; + ret = pthread_create(&(work[thread].thread_id), ht->resize_attr, + partition_resize_thread, &work[thread]); + assert(!ret); + } + for (thread = 0; thread < nr_threads; thread++) { + ret = pthread_join(work[thread].thread_id, NULL); + assert(!ret); + } + free(work); +} + +/* + * Holding RCU read lock to protect _cds_lfht_add against memory + * reclaim that could be performed by other call_rcu worker threads (ABA + * problem). + * + * When we reach a certain length, we can split this population phase over + * many worker threads, based on the number of CPUs available in the system. + * This should therefore take care of not having the expand lagging behind too + * many concurrent insertion threads by using the scheduler's ability to + * schedule bucket node population fairly with insertions. + */ +static +void init_table_populate_partition(struct cds_lfht *ht, unsigned long i, + unsigned long start, unsigned long len) +{ + unsigned long j, size = 1UL << (i - 1); + + assert(i > MIN_TABLE_ORDER); + ht->flavor->read_lock(); + for (j = size + start; j < size + start + len; j++) { + struct cds_lfht_node *new_node = bucket_at(ht, j); + + assert(j >= size && j < (size << 1)); + dbg_printf("init populate: order %lu index %lu hash %lu\n", + i, j, j); + new_node->reverse_hash = bit_reverse_ulong(j); + _cds_lfht_add(ht, j, NULL, NULL, size, new_node, NULL, 1); + } + ht->flavor->read_unlock(); +} + +static +void init_table_populate(struct cds_lfht *ht, unsigned long i, + unsigned long len) +{ + assert(nr_cpus_mask != -1); + if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) { + ht->flavor->thread_online(); + init_table_populate_partition(ht, i, 0, len); + ht->flavor->thread_offline(); + return; + } + partition_resize_helper(ht, i, len, init_table_populate_partition); +} + +static +void init_table(struct cds_lfht *ht, + unsigned long first_order, unsigned long last_order) +{ + unsigned long i; + + dbg_printf("init table: first_order %lu last_order %lu\n", + first_order, last_order); + assert(first_order > MIN_TABLE_ORDER); + for (i = first_order; i <= last_order; i++) { + unsigned long len; + + len = 1UL << (i - 1); + dbg_printf("init order %lu len: %lu\n", i, len); + + /* Stop expand if the resize target changes under us */ + if (CMM_LOAD_SHARED(ht->resize_target) < (1UL << i)) + break; + + cds_lfht_alloc_bucket_table(ht, i); + + /* + * Set all bucket nodes reverse hash values for a level and + * link all bucket nodes into the table. + */ + init_table_populate(ht, i, len); + + /* + * Update table size. + */ + cmm_smp_wmb(); /* populate data before RCU size */ + CMM_STORE_SHARED(ht->size, 1UL << i); + + dbg_printf("init new size: %lu\n", 1UL << i); + if (CMM_LOAD_SHARED(ht->in_progress_destroy)) + break; + } +} + +/* + * Holding RCU read lock to protect _cds_lfht_remove against memory + * reclaim that could be performed by other call_rcu worker threads (ABA + * problem). + * For a single level, we logically remove and garbage collect each node. + * + * As a design choice, we perform logical removal and garbage collection on a + * node-per-node basis to simplify this algorithm. We also assume keeping good + * cache locality of the operation would overweight possible performance gain + * that could be achieved by batching garbage collection for multiple levels. + * However, this would have to be justified by benchmarks. + * + * Concurrent removal and add operations are helping us perform garbage + * collection of logically removed nodes. We guarantee that all logically + * removed nodes have been garbage-collected (unlinked) before call_rcu is + * invoked to free a hole level of bucket nodes (after a grace period). + * + * Logical removal and garbage collection can therefore be done in batch or on a + * node-per-node basis, as long as the guarantee above holds. + * + * When we reach a certain length, we can split this removal over many worker + * threads, based on the number of CPUs available in the system. This should + * take care of not letting resize process lag behind too many concurrent + * updater threads actively inserting into the hash table. + */ +static +void remove_table_partition(struct cds_lfht *ht, unsigned long i, + unsigned long start, unsigned long len) +{ + unsigned long j, size = 1UL << (i - 1); + + assert(i > MIN_TABLE_ORDER); + ht->flavor->read_lock(); + for (j = size + start; j < size + start + len; j++) { + struct cds_lfht_node *fini_bucket = bucket_at(ht, j); + struct cds_lfht_node *parent_bucket = bucket_at(ht, j - size); + + assert(j >= size && j < (size << 1)); + dbg_printf("remove entry: order %lu index %lu hash %lu\n", + i, j, j); + /* Set the REMOVED_FLAG to freeze the ->next for gc */ + uatomic_or(&fini_bucket->next, REMOVED_FLAG); + _cds_lfht_gc_bucket(parent_bucket, fini_bucket); + } + ht->flavor->read_unlock(); +} + +static +void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len) +{ + + assert(nr_cpus_mask != -1); + if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) { + ht->flavor->thread_online(); + remove_table_partition(ht, i, 0, len); + ht->flavor->thread_offline(); + return; + } + partition_resize_helper(ht, i, len, remove_table_partition); +} + +/* + * fini_table() is never called for first_order == 0, which is why + * free_by_rcu_order == 0 can be used as criterion to know if free must + * be called. + */ +static +void fini_table(struct cds_lfht *ht, + unsigned long first_order, unsigned long last_order) +{ + long i; + unsigned long free_by_rcu_order = 0; + + dbg_printf("fini table: first_order %lu last_order %lu\n", + first_order, last_order); + assert(first_order > MIN_TABLE_ORDER); + for (i = last_order; i >= first_order; i--) { + unsigned long len; + + len = 1UL << (i - 1); + dbg_printf("fini order %lu len: %lu\n", i, len); + + /* Stop shrink if the resize target changes under us */ + if (CMM_LOAD_SHARED(ht->resize_target) > (1UL << (i - 1))) + break; + + cmm_smp_wmb(); /* populate data before RCU size */ + CMM_STORE_SHARED(ht->size, 1UL << (i - 1)); + + /* + * We need to wait for all add operations to reach Q.S. (and + * thus use the new table for lookups) before we can start + * releasing the old bucket nodes. Otherwise their lookup will + * return a logically removed node as insert position. + */ + ht->flavor->update_synchronize_rcu(); + if (free_by_rcu_order) + cds_lfht_free_bucket_table(ht, free_by_rcu_order); + + /* + * Set "removed" flag in bucket nodes about to be removed. + * Unlink all now-logically-removed bucket node pointers. + * Concurrent add/remove operation are helping us doing + * the gc. + */ + remove_table(ht, i, len); + + free_by_rcu_order = i; + + dbg_printf("fini new size: %lu\n", 1UL << i); + if (CMM_LOAD_SHARED(ht->in_progress_destroy)) + break; + } + + if (free_by_rcu_order) { + ht->flavor->update_synchronize_rcu(); + cds_lfht_free_bucket_table(ht, free_by_rcu_order); + } +} + +static +void cds_lfht_create_bucket(struct cds_lfht *ht, unsigned long size) +{ + struct cds_lfht_node *prev, *node; + unsigned long order, len, i; + + cds_lfht_alloc_bucket_table(ht, 0); + + dbg_printf("create bucket: order 0 index 0 hash 0\n"); + node = bucket_at(ht, 0); + node->next = flag_bucket(get_end()); + node->reverse_hash = 0; + + for (order = 1; order < cds_lfht_get_count_order_ulong(size) + 1; order++) { + len = 1UL << (order - 1); + cds_lfht_alloc_bucket_table(ht, order); + + for (i = 0; i < len; i++) { + /* + * Now, we are trying to init the node with the + * hash=(len+i) (which is also a bucket with the + * index=(len+i)) and insert it into the hash table, + * so this node has to be inserted after the bucket + * with the index=(len+i)&(len-1)=i. And because there + * is no other non-bucket node nor bucket node with + * larger index/hash inserted, so the bucket node + * being inserted should be inserted directly linked + * after the bucket node with index=i. + */ + prev = bucket_at(ht, i); + node = bucket_at(ht, len + i); + + dbg_printf("create bucket: order %lu index %lu hash %lu\n", + order, len + i, len + i); + node->reverse_hash = bit_reverse_ulong(len + i); + + /* insert after prev */ + assert(is_bucket(prev->next)); + node->next = prev->next; + prev->next = flag_bucket(node); + } + } +} + +struct cds_lfht *_cds_lfht_new(unsigned long init_size, + unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets, + int flags, + const struct cds_lfht_mm_type *mm, + const struct rcu_flavor_struct *flavor, + pthread_attr_t *attr) +{ + struct cds_lfht *ht; + unsigned long order; + + /* min_nr_alloc_buckets must be power of two */ + if (!min_nr_alloc_buckets || (min_nr_alloc_buckets & (min_nr_alloc_buckets - 1))) + return NULL; + + /* init_size must be power of two */ + if (!init_size || (init_size & (init_size - 1))) + return NULL; + + /* + * Memory management plugin default. + */ + if (!mm) { + if (CAA_BITS_PER_LONG > 32 + && max_nr_buckets + && max_nr_buckets <= (1ULL << 32)) { + /* + * For 64-bit architectures, with max number of + * buckets small enough not to use the entire + * 64-bit memory mapping space (and allowing a + * fair number of hash table instances), use the + * mmap allocator, which is faster than the + * order allocator. + */ + mm = &cds_lfht_mm_mmap; + } else { + /* + * The fallback is to use the order allocator. + */ + mm = &cds_lfht_mm_order; + } + } + + /* max_nr_buckets == 0 for order based mm means infinite */ + if (mm == &cds_lfht_mm_order && !max_nr_buckets) + max_nr_buckets = 1UL << (MAX_TABLE_ORDER - 1); + + /* max_nr_buckets must be power of two */ + if (!max_nr_buckets || (max_nr_buckets & (max_nr_buckets - 1))) + return NULL; + + min_nr_alloc_buckets = max(min_nr_alloc_buckets, MIN_TABLE_SIZE); + init_size = max(init_size, MIN_TABLE_SIZE); + max_nr_buckets = max(max_nr_buckets, min_nr_alloc_buckets); + init_size = min(init_size, max_nr_buckets); + + ht = mm->alloc_cds_lfht(min_nr_alloc_buckets, max_nr_buckets); + assert(ht); + assert(ht->mm == mm); + assert(ht->bucket_at == mm->bucket_at); + + ht->flags = flags; + ht->flavor = flavor; + ht->resize_attr = attr; + alloc_split_items_count(ht); + /* this mutex should not nest in read-side C.S. */ + pthread_mutex_init(&ht->resize_mutex, NULL); + order = cds_lfht_get_count_order_ulong(init_size); + ht->resize_target = 1UL << order; + cds_lfht_create_bucket(ht, 1UL << order); + ht->size = 1UL << order; + return ht; +} + +void cds_lfht_lookup(struct cds_lfht *ht, unsigned long hash, + cds_lfht_match_fct match, const void *key, + struct cds_lfht_iter *iter) +{ + struct cds_lfht_node *node, *next, *bucket; + unsigned long reverse_hash, size; + + reverse_hash = bit_reverse_ulong(hash); + + size = rcu_dereference(ht->size); + bucket = lookup_bucket(ht, size, hash); + /* We can always skip the bucket node initially */ + node = rcu_dereference(bucket->next); + node = clear_flag(node); + for (;;) { + if (caa_unlikely(is_end(node))) { + node = next = NULL; + break; + } + if (caa_unlikely(node->reverse_hash > reverse_hash)) { + node = next = NULL; + break; + } + next = rcu_dereference(node->next); + assert(node == clear_flag(node)); + if (caa_likely(!is_removed(next)) + && !is_bucket(next) + && node->reverse_hash == reverse_hash + && caa_likely(match(node, key))) { + break; + } + node = clear_flag(next); + } + assert(!node || !is_bucket(rcu_dereference(node->next))); + iter->node = node; + iter->next = next; +} + +void cds_lfht_next_duplicate(struct cds_lfht *ht, cds_lfht_match_fct match, + const void *key, struct cds_lfht_iter *iter) +{ + struct cds_lfht_node *node, *next; + unsigned long reverse_hash; + + node = iter->node; + reverse_hash = node->reverse_hash; + next = iter->next; + node = clear_flag(next); + + for (;;) { + if (caa_unlikely(is_end(node))) { + node = next = NULL; + break; + } + if (caa_unlikely(node->reverse_hash > reverse_hash)) { + node = next = NULL; + break; + } + next = rcu_dereference(node->next); + if (caa_likely(!is_removed(next)) + && !is_bucket(next) + && caa_likely(match(node, key))) { + break; + } + node = clear_flag(next); + } + assert(!node || !is_bucket(rcu_dereference(node->next))); + iter->node = node; + iter->next = next; +} + +void cds_lfht_next(struct cds_lfht *ht, struct cds_lfht_iter *iter) +{ + struct cds_lfht_node *node, *next; + + node = clear_flag(iter->next); + for (;;) { + if (caa_unlikely(is_end(node))) { + node = next = NULL; + break; + } + next = rcu_dereference(node->next); + if (caa_likely(!is_removed(next)) + && !is_bucket(next)) { + break; + } + node = clear_flag(next); + } + assert(!node || !is_bucket(rcu_dereference(node->next))); + iter->node = node; + iter->next = next; +} + +void cds_lfht_first(struct cds_lfht *ht, struct cds_lfht_iter *iter) +{ + /* + * Get next after first bucket node. The first bucket node is the + * first node of the linked list. + */ + iter->next = bucket_at(ht, 0)->next; + cds_lfht_next(ht, iter); +} + +void cds_lfht_add(struct cds_lfht *ht, unsigned long hash, + struct cds_lfht_node *node) +{ + unsigned long size; + + node->reverse_hash = bit_reverse_ulong(hash); + size = rcu_dereference(ht->size); + _cds_lfht_add(ht, hash, NULL, NULL, size, node, NULL, 0); + ht_count_add(ht, size, hash); +} + +struct cds_lfht_node *cds_lfht_add_unique(struct cds_lfht *ht, + unsigned long hash, + cds_lfht_match_fct match, + const void *key, + struct cds_lfht_node *node) +{ + unsigned long size; + struct cds_lfht_iter iter; + + node->reverse_hash = bit_reverse_ulong(hash); + size = rcu_dereference(ht->size); + _cds_lfht_add(ht, hash, match, key, size, node, &iter, 0); + if (iter.node == node) + ht_count_add(ht, size, hash); + return iter.node; +} + +struct cds_lfht_node *cds_lfht_add_replace(struct cds_lfht *ht, + unsigned long hash, + cds_lfht_match_fct match, + const void *key, + struct cds_lfht_node *node) +{ + unsigned long size; + struct cds_lfht_iter iter; + + node->reverse_hash = bit_reverse_ulong(hash); + size = rcu_dereference(ht->size); + for (;;) { + _cds_lfht_add(ht, hash, match, key, size, node, &iter, 0); + if (iter.node == node) { + ht_count_add(ht, size, hash); + return NULL; + } + + if (!_cds_lfht_replace(ht, size, iter.node, iter.next, node)) + return iter.node; + } +} + +int cds_lfht_replace(struct cds_lfht *ht, + struct cds_lfht_iter *old_iter, + unsigned long hash, + cds_lfht_match_fct match, + const void *key, + struct cds_lfht_node *new_node) +{ + unsigned long size; + + new_node->reverse_hash = bit_reverse_ulong(hash); + if (!old_iter->node) + return -ENOENT; + if (caa_unlikely(old_iter->node->reverse_hash != new_node->reverse_hash)) + return -EINVAL; + if (caa_unlikely(!match(old_iter->node, key))) + return -EINVAL; + size = rcu_dereference(ht->size); + return _cds_lfht_replace(ht, size, old_iter->node, old_iter->next, + new_node); +} + +int cds_lfht_del(struct cds_lfht *ht, struct cds_lfht_node *node) +{ + unsigned long size, hash; + int ret; + + size = rcu_dereference(ht->size); + ret = _cds_lfht_del(ht, size, node); + if (!ret) { + hash = bit_reverse_ulong(node->reverse_hash); + ht_count_del(ht, size, hash); + } + return ret; +} + +static +int cds_lfht_delete_bucket(struct cds_lfht *ht) +{ + struct cds_lfht_node *node; + unsigned long order, i, size; + + /* Check that the table is empty */ + node = bucket_at(ht, 0); + do { + node = clear_flag(node)->next; + if (!is_bucket(node)) + return -EPERM; + assert(!is_removed(node)); + } while (!is_end(node)); + /* + * size accessed without rcu_dereference because hash table is + * being destroyed. + */ + size = ht->size; + /* Internal sanity check: all nodes left should be bucket */ + for (i = 0; i < size; i++) { + node = bucket_at(ht, i); + dbg_printf("delete bucket: index %lu expected hash %lu hash %lu\n", + i, i, bit_reverse_ulong(node->reverse_hash)); + assert(is_bucket(node->next)); + } + + for (order = cds_lfht_get_count_order_ulong(size); (long)order >= 0; order--) + cds_lfht_free_bucket_table(ht, order); + + return 0; +} + +/* + * Should only be called when no more concurrent readers nor writers can + * possibly access the table. + */ +int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr) +{ + int ret; + + /* Wait for in-flight resize operations to complete */ + _CMM_STORE_SHARED(ht->in_progress_destroy, 1); + cmm_smp_mb(); /* Store destroy before load resize */ + while (uatomic_read(&ht->in_progress_resize)) + poll(NULL, 0, 100); /* wait for 100ms */ + ret = cds_lfht_delete_bucket(ht); + if (ret) + return ret; + free_split_items_count(ht); + if (attr) + *attr = ht->resize_attr; + poison_free(ht); + return ret; +} + +void cds_lfht_count_nodes(struct cds_lfht *ht, + long *approx_before, + unsigned long *count, + long *approx_after) +{ + struct cds_lfht_node *node, *next; + unsigned long nr_bucket = 0, nr_removed = 0; + + *approx_before = 0; + if (ht->split_count) { + int i; + + for (i = 0; i < split_count_mask + 1; i++) { + *approx_before += uatomic_read(&ht->split_count[i].add); + *approx_before -= uatomic_read(&ht->split_count[i].del); + } + } + + *count = 0; + + /* Count non-bucket nodes in the table */ + node = bucket_at(ht, 0); + do { + next = rcu_dereference(node->next); + if (is_removed(next)) { + if (!is_bucket(next)) + (nr_removed)++; + else + (nr_bucket)++; + } else if (!is_bucket(next)) + (*count)++; + else + (nr_bucket)++; + node = clear_flag(next); + } while (!is_end(node)); + dbg_printf("number of logically removed nodes: %lu\n", nr_removed); + dbg_printf("number of bucket nodes: %lu\n", nr_bucket); + *approx_after = 0; + if (ht->split_count) { + int i; + + for (i = 0; i < split_count_mask + 1; i++) { + *approx_after += uatomic_read(&ht->split_count[i].add); + *approx_after -= uatomic_read(&ht->split_count[i].del); + } + } +} + +/* called with resize mutex held */ +static +void _do_cds_lfht_grow(struct cds_lfht *ht, + unsigned long old_size, unsigned long new_size) +{ + unsigned long old_order, new_order; + + old_order = cds_lfht_get_count_order_ulong(old_size); + new_order = cds_lfht_get_count_order_ulong(new_size); + dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n", + old_size, old_order, new_size, new_order); + assert(new_size > old_size); + init_table(ht, old_order + 1, new_order); +} + +/* called with resize mutex held */ +static +void _do_cds_lfht_shrink(struct cds_lfht *ht, + unsigned long old_size, unsigned long new_size) +{ + unsigned long old_order, new_order; + + new_size = max(new_size, MIN_TABLE_SIZE); + old_order = cds_lfht_get_count_order_ulong(old_size); + new_order = cds_lfht_get_count_order_ulong(new_size); + dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n", + old_size, old_order, new_size, new_order); + assert(new_size < old_size); + + /* Remove and unlink all bucket nodes to remove. */ + fini_table(ht, new_order + 1, old_order); +} + + +/* called with resize mutex held */ +static +void _do_cds_lfht_resize(struct cds_lfht *ht) +{ + unsigned long new_size, old_size; + + /* + * Resize table, re-do if the target size has changed under us. + */ + do { + assert(uatomic_read(&ht->in_progress_resize)); + if (CMM_LOAD_SHARED(ht->in_progress_destroy)) + break; + ht->resize_initiated = 1; + old_size = ht->size; + new_size = CMM_LOAD_SHARED(ht->resize_target); + if (old_size < new_size) + _do_cds_lfht_grow(ht, old_size, new_size); + else if (old_size > new_size) + _do_cds_lfht_shrink(ht, old_size, new_size); + ht->resize_initiated = 0; + /* write resize_initiated before read resize_target */ + cmm_smp_mb(); + } while (ht->size != CMM_LOAD_SHARED(ht->resize_target)); +} + +static +unsigned long resize_target_grow(struct cds_lfht *ht, unsigned long new_size) +{ + return _uatomic_xchg_monotonic_increase(&ht->resize_target, new_size); +} + +static +void resize_target_update_count(struct cds_lfht *ht, + unsigned long count) +{ + count = max(count, MIN_TABLE_SIZE); + count = min(count, ht->max_nr_buckets); + uatomic_set(&ht->resize_target, count); +} + +void cds_lfht_resize(struct cds_lfht *ht, unsigned long new_size) +{ + resize_target_update_count(ht, new_size); + CMM_STORE_SHARED(ht->resize_initiated, 1); + ht->flavor->thread_offline(); + pthread_mutex_lock(&ht->resize_mutex); + _do_cds_lfht_resize(ht); + pthread_mutex_unlock(&ht->resize_mutex); + ht->flavor->thread_online(); +} + +static +void do_resize_cb(struct rcu_head *head) +{ + struct rcu_resize_work *work = + caa_container_of(head, struct rcu_resize_work, head); + struct cds_lfht *ht = work->ht; + + ht->flavor->thread_offline(); + pthread_mutex_lock(&ht->resize_mutex); + _do_cds_lfht_resize(ht); + pthread_mutex_unlock(&ht->resize_mutex); + ht->flavor->thread_online(); + poison_free(work); + cmm_smp_mb(); /* finish resize before decrement */ + uatomic_dec(&ht->in_progress_resize); +} + +static +void __cds_lfht_resize_lazy_launch(struct cds_lfht *ht) +{ + struct rcu_resize_work *work; + + /* Store resize_target before read resize_initiated */ + cmm_smp_mb(); + if (!CMM_LOAD_SHARED(ht->resize_initiated)) { + uatomic_inc(&ht->in_progress_resize); + cmm_smp_mb(); /* increment resize count before load destroy */ + if (CMM_LOAD_SHARED(ht->in_progress_destroy)) { + uatomic_dec(&ht->in_progress_resize); + return; + } + work = malloc(sizeof(*work)); + work->ht = ht; + ht->flavor->update_call_rcu(&work->head, do_resize_cb); + CMM_STORE_SHARED(ht->resize_initiated, 1); + } +} + +static +void cds_lfht_resize_lazy_grow(struct cds_lfht *ht, unsigned long size, int growth) +{ + unsigned long target_size = size << growth; + + target_size = min(target_size, ht->max_nr_buckets); + if (resize_target_grow(ht, target_size) >= target_size) + return; + + __cds_lfht_resize_lazy_launch(ht); +} + +/* + * We favor grow operations over shrink. A shrink operation never occurs + * if a grow operation is queued for lazy execution. A grow operation + * cancels any pending shrink lazy execution. + */ +static +void cds_lfht_resize_lazy_count(struct cds_lfht *ht, unsigned long size, + unsigned long count) +{ + if (!(ht->flags & CDS_LFHT_AUTO_RESIZE)) + return; + count = max(count, MIN_TABLE_SIZE); + count = min(count, ht->max_nr_buckets); + if (count == size) + return; /* Already the right size, no resize needed */ + if (count > size) { /* lazy grow */ + if (resize_target_grow(ht, count) >= count) + return; + } else { /* lazy shrink */ + for (;;) { + unsigned long s; + + s = uatomic_cmpxchg(&ht->resize_target, size, count); + if (s == size) + break; /* no resize needed */ + if (s > size) + return; /* growing is/(was just) in progress */ + if (s <= count) + return; /* some other thread do shrink */ + size = s; + } + } + __cds_lfht_resize_lazy_launch(ht); +} diff --git a/liblttng-ht/rculfhash.h b/liblttng-ht/rculfhash.h new file mode 100644 index 000000000..136c7259b --- /dev/null +++ b/liblttng-ht/rculfhash.h @@ -0,0 +1,434 @@ +#ifndef _URCU_RCULFHASH_H +#define _URCU_RCULFHASH_H + +/* + * urcu/rculfhash.h + * + * Userspace RCU library - Lock-Free RCU Hash Table + * + * Copyright 2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Include this file _after_ including your URCU flavor. + */ + +#include +#include +#include + +#include "urcu-flavor.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * cds_lfht_node: Contains the next pointers and reverse-hash + * value required for lookup and traversal of the hash table. + * + * struct cds_lfht_node should be aligned on 8-bytes boundaries because + * the three lower bits are used as flags. It is worth noting that the + * information contained within these three bits could be represented on + * two bits by re-using the same bit for REMOVAL_OWNER_FLAG and + * BUCKET_FLAG. This can be done if we ensure that no iterator nor + * updater check the BUCKET_FLAG after it detects that the REMOVED_FLAG + * is set. Given the minimum size of struct cds_lfht_node is 8 bytes on + * 32-bit architectures, we choose to go for simplicity and reserve + * three bits. + * + * struct cds_lfht_node can be embedded into a structure (as a field). + * caa_container_of() can be used to get the structure from the struct + * cds_lfht_node after a lookup. + * + * The structure which embeds it typically holds the key (or key-value + * pair) of the object. The caller code is responsible for calculation + * of the hash value for cds_lfht APIs. + */ +struct cds_lfht_node { + struct cds_lfht_node *next; /* ptr | REMOVAL_OWNER_FLAG | BUCKET_FLAG | REMOVED_FLAG */ + unsigned long reverse_hash; +} __attribute__((aligned(8))); + +/* cds_lfht_iter: Used to track state while traversing a hash chain. */ +struct cds_lfht_iter { + struct cds_lfht_node *node, *next; +}; + +static inline +struct cds_lfht_node *cds_lfht_iter_get_node(struct cds_lfht_iter *iter) +{ + return iter->node; +} + +struct cds_lfht; + +/* + * Caution ! + * Ensure reader and writer threads are registered as urcu readers. + */ + +typedef int (*cds_lfht_match_fct)(struct cds_lfht_node *node, const void *key); + +/* + * cds_lfht_node_init - initialize a hash table node + * @node: the node to initialize. + * + * This function is kept to be eventually used for debugging purposes + * (detection of memory corruption). + */ +static inline +void cds_lfht_node_init(struct cds_lfht_node *node) +{ +} + +/* + * Hash table creation flags. + */ +enum { + CDS_LFHT_AUTO_RESIZE = (1U << 0), + CDS_LFHT_ACCOUNTING = (1U << 1), +}; + +struct cds_lfht_mm_type { + struct cds_lfht *(*alloc_cds_lfht)(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets); + void (*alloc_bucket_table)(struct cds_lfht *ht, unsigned long order); + void (*free_bucket_table)(struct cds_lfht *ht, unsigned long order); + struct cds_lfht_node *(*bucket_at)(struct cds_lfht *ht, + unsigned long index); +}; + +extern const struct cds_lfht_mm_type cds_lfht_mm_order; +extern const struct cds_lfht_mm_type cds_lfht_mm_chunk; +extern const struct cds_lfht_mm_type cds_lfht_mm_mmap; + +/* + * _cds_lfht_new - API used by cds_lfht_new wrapper. Do not use directly. + */ +struct cds_lfht *_cds_lfht_new(unsigned long init_size, + unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets, + int flags, + const struct cds_lfht_mm_type *mm, + const struct rcu_flavor_struct *flavor, + pthread_attr_t *attr); + +/* + * cds_lfht_new - allocate a hash table. + * @init_size: number of buckets to allocate initially. Must be power of two. + * @min_nr_alloc_buckets: the minimum number of allocated buckets. + * (must be power of two) + * @max_nr_buckets: the maximum number of hash table buckets allowed. + * (must be power of two) + * @flags: hash table creation flags (can be combined with bitwise or: '|'). + * 0: no flags. + * CDS_LFHT_AUTO_RESIZE: automatically resize hash table. + * CDS_LFHT_ACCOUNTING: count the number of node addition + * and removal in the table + * @attr: optional resize worker thread attributes. NULL for default. + * + * Return NULL on error. + * Note: the RCU flavor must be already included before the hash table header. + * + * The programmer is responsible for ensuring that resize operation has a + * priority equal to hash table updater threads. It should be performed by + * specifying the appropriate priority in the pthread "attr" argument, and, + * for CDS_LFHT_AUTO_RESIZE, by ensuring that call_rcu worker threads also have + * this priority level. Having lower priority for call_rcu and resize threads + * does not pose any correctness issue, but the resize operations could be + * starved by updates, thus leading to long hash table bucket chains. + * Threads calling this API are NOT required to be registered RCU read-side + * threads. It can be called very early.(before rcu is initialized ...etc.) + */ +static inline +struct cds_lfht *cds_lfht_new(unsigned long init_size, + unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets, + int flags, + pthread_attr_t *attr) +{ + return _cds_lfht_new(init_size, min_nr_alloc_buckets, max_nr_buckets, + flags, NULL, &rcu_flavor, attr); +} + +/* + * cds_lfht_destroy - destroy a hash table. + * @ht: the hash table to destroy. + * @attr: (output) resize worker thread attributes, as received by cds_lfht_new. + * The caller will typically want to free this pointer if dynamically + * allocated. The attr point can be NULL if the caller does not + * need to be informed of the value passed to cds_lfht_new(). + * + * Return 0 on success, negative error value on error. + * Threads calling this API need to be registered RCU read-side threads. + */ +int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr); + +/* + * cds_lfht_count_nodes - count the number of nodes in the hash table. + * @ht: the hash table. + * @split_count_before: Sample the node count split-counter before traversal. + * @count: Traverse the hash table, count the number of nodes observed. + * @split_count_after: Sample the node count split-counter after traversal. + * + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + */ +void cds_lfht_count_nodes(struct cds_lfht *ht, + long *split_count_before, + unsigned long *count, + long *split_count_after); + +/* + * cds_lfht_lookup - lookup a node by key. + * @ht: the hash table. + * @hash: the key hash. + * @match: the key match function. + * @key: the current node key. + * @iter: Node, if found (output). *iter->node set to NULL if not found. + * + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + */ +void cds_lfht_lookup(struct cds_lfht *ht, unsigned long hash, + cds_lfht_match_fct match, const void *key, + struct cds_lfht_iter *iter); + +/* + * cds_lfht_next_duplicate - get the next item with same key (after a lookup). + * @ht: the hash table. + * @match: the key match function. + * @key: the current node key. + * @iter: Node, if found (output). *iter->node set to NULL if not found. + * + * Uses an iterator initialized by a lookup. + * Sets *iter-node to the following node with same key. + * Sets *iter->node to NULL if no following node exists with same key. + * RCU read-side lock must be held across cds_lfht_lookup and + * cds_lfht_next calls, and also between cds_lfht_next calls using the + * node returned by a previous cds_lfht_next. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + */ +void cds_lfht_next_duplicate(struct cds_lfht *ht, + cds_lfht_match_fct match, const void *key, + struct cds_lfht_iter *iter); + +/* + * cds_lfht_first - get the first node in the table. + * @ht: the hash table. + * @iter: First node, if exists (output). *iter->node set to NULL if not found. + * + * Output in "*iter". *iter->node set to NULL if table is empty. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + */ +void cds_lfht_first(struct cds_lfht *ht, struct cds_lfht_iter *iter); + +/* + * cds_lfht_next - get the next node in the table. + * @ht: the hash table. + * @iter: Next node, if exists (output). *iter->node set to NULL if not found. + * + * Input/Output in "*iter". *iter->node set to NULL if *iter was + * pointing to the last table node. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + */ +void cds_lfht_next(struct cds_lfht *ht, struct cds_lfht_iter *iter); + +/* + * cds_lfht_add - add a node to the hash table. + * @ht: the hash table. + * @hash: the key hash. + * @node: the node to add. + * + * This function supports adding redundant keys into the table. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + */ +void cds_lfht_add(struct cds_lfht *ht, unsigned long hash, + struct cds_lfht_node *node); + +/* + * cds_lfht_add_unique - add a node to hash table, if key is not present. + * @ht: the hash table. + * @hash: the node's hash. + * @match: the key match function. + * @key: the node's key. + * @node: the node to try adding. + * + * Return the node added upon success. + * Return the unique node already present upon failure. If + * cds_lfht_add_unique fails, the node passed as parameter should be + * freed by the caller. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * + * The semantic of this function is that if only this function is used + * to add keys into the table, no duplicated keys should ever be + * observable in the table. The same guarantee apply for combination of + * add_unique and add_replace (see below). + */ +struct cds_lfht_node *cds_lfht_add_unique(struct cds_lfht *ht, + unsigned long hash, + cds_lfht_match_fct match, + const void *key, + struct cds_lfht_node *node); + +/* + * cds_lfht_add_replace - replace or add a node within hash table. + * @ht: the hash table. + * @hash: the node's hash. + * @match: the key match function. + * @key: the node's key. + * @node: the node to add. + * + * Return the node replaced upon success. If no node matching the key + * was present, return NULL, which also means the operation succeeded. + * This replacement operation should never fail. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * After successful replacement, a grace period must be waited for before + * freeing the memory reserved for the returned node. + * + * The semantic of replacement vs lookups is the following: if lookups + * are performed between a key unique insertion and its removal, we + * guarantee that the lookups and get next will always find exactly one + * instance of the key if it is replaced concurrently with the lookups. + * + * Providing this semantic allows us to ensure that replacement-only + * schemes will never generate duplicated keys. It also allows us to + * guarantee that a combination of add_replace and add_unique updates + * will never generate duplicated keys. + */ +struct cds_lfht_node *cds_lfht_add_replace(struct cds_lfht *ht, + unsigned long hash, + cds_lfht_match_fct match, + const void *key, + struct cds_lfht_node *node); + +/* + * cds_lfht_replace - replace a node pointer to by iter within hash table. + * @ht: the hash table. + * @old_iter: the iterator position of the node to replace. + * @hash: the node's hash. + * @match: the key match function. + * @key: the node's key. + * @new_node: the new node to use as replacement. + * + * Return 0 if replacement is successful, negative value otherwise. + * Replacing a NULL old node or an already removed node will fail with + * -ENOENT. + * If the hash or value of the node to replace and the new node differ, + * this function returns -EINVAL without proceeding to the replacement. + * Old node can be looked up with cds_lfht_lookup and cds_lfht_next. + * RCU read-side lock must be held between lookup and replacement. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * After successful replacement, a grace period must be waited for before + * freeing the memory reserved for the old node (which can be accessed + * with cds_lfht_iter_get_node). + * + * The semantic of replacement vs lookups is the following: if lookups + * are performed between a key unique insertion and its removal, we + * guarantee that the lookups and get next will always find exactly one + * instance of the key if it is replaced concurrently with the lookups. + * + * Providing this semantic allows us to ensure that replacement-only + * schemes will never generate duplicated keys. It also allows us to + * guarantee that a combination of add_replace and add_unique updates + * will never generate duplicated keys. + */ +int cds_lfht_replace(struct cds_lfht *ht, + struct cds_lfht_iter *old_iter, + unsigned long hash, + cds_lfht_match_fct match, + const void *key, + struct cds_lfht_node *new_node); + +/* + * cds_lfht_del - remove node pointed to by iterator from hash table. + * @ht: the hash table. + * @node: the node to delete. + * + * Return 0 if the node is successfully removed, negative value + * otherwise. + * Deleting a NULL node or an already removed node will fail with a + * negative value. + * Node can be looked up with cds_lfht_lookup and cds_lfht_next, + * followed by use of cds_lfht_iter_get_node. + * RCU read-side lock must be held between lookup and removal. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * After successful removal, a grace period must be waited for before + * freeing the memory reserved for old node (which can be accessed with + * cds_lfht_iter_get_node). + */ +int cds_lfht_del(struct cds_lfht *ht, struct cds_lfht_node *node); + +/* + * cds_lfht_resize - Force a hash table resize + * @ht: the hash table. + * @new_size: update to this hash table size. + * + * Threads calling this API need to be registered RCU read-side threads. + */ +void cds_lfht_resize(struct cds_lfht *ht, unsigned long new_size); + +/* + * Note: cds_lfht_for_each are safe for element removal during + * iteration. + */ +#define cds_lfht_for_each(ht, iter, node) \ + for (cds_lfht_first(ht, iter), \ + node = cds_lfht_iter_get_node(iter); \ + node != NULL; \ + cds_lfht_next(ht, iter), \ + node = cds_lfht_iter_get_node(iter)) + +#define cds_lfht_for_each_duplicate(ht, hash, match, key, iter, node) \ + for (cds_lfht_lookup(ht, hash, match, key, iter), \ + node = cds_lfht_iter_get_node(iter); \ + node != NULL; \ + cds_lfht_next_duplicate(ht, match, key, iter), \ + node = cds_lfht_iter_get_node(iter)) + +#define cds_lfht_for_each_entry(ht, iter, pos, member) \ + for (cds_lfht_first(ht, iter), \ + pos = caa_container_of(cds_lfht_iter_get_node(iter), \ + typeof(*(pos)), member); \ + &(pos)->member != NULL; \ + cds_lfht_next(ht, iter), \ + pos = caa_container_of(cds_lfht_iter_get_node(iter), \ + typeof(*(pos)), member)) + +#define cds_lfht_for_each_entry_duplicate(ht, hash, match, key, \ + iter, pos, member) \ + for (cds_lfht_lookup(ht, hash, match, key, iter), \ + pos = caa_container_of(cds_lfht_iter_get_node(iter), \ + typeof(*(pos)), member); \ + &(pos)->member != NULL; \ + cds_lfht_next_duplicate(ht, match, key, iter), \ + pos = caa_container_of(cds_lfht_iter_get_node(iter), \ + typeof(*(pos)), member)) + +#ifdef __cplusplus +} +#endif + +#endif /* _URCU_RCULFHASH_H */ diff --git a/liblttng-ht/urcu-flavor.h b/liblttng-ht/urcu-flavor.h new file mode 100644 index 000000000..9af4d0e63 --- /dev/null +++ b/liblttng-ht/urcu-flavor.h @@ -0,0 +1,65 @@ +#ifndef _URCU_FLAVOR_H +#define _URCU_FLAVOR_H + +/* + * urcu-flavor.h + * + * Userspace RCU header - rcu flavor declarations + * + * Copyright (c) 2011 Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef __cplusplus +extern "C" { +#endif + +struct rcu_flavor_struct { + void (*read_lock)(void); + void (*read_unlock)(void); + void (*read_quiescent_state)(void); + void (*update_call_rcu)(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + void (*update_synchronize_rcu)(void); + void (*update_defer_rcu)(void (*fct)(void *p), void *p); + + void (*thread_offline)(void); + void (*thread_online)(void); + void (*register_thread)(void); + void (*unregister_thread)(void); +}; + +#define DEFINE_RCU_FLAVOR(x) \ +const struct rcu_flavor_struct x = { \ + .read_lock = rcu_read_lock, \ + .read_unlock = rcu_read_unlock, \ + .read_quiescent_state = rcu_quiescent_state, \ + .update_call_rcu = call_rcu, \ + .update_synchronize_rcu = synchronize_rcu, \ + .update_defer_rcu = defer_rcu, \ + .thread_offline = rcu_thread_offline, \ + .thread_online = rcu_thread_online, \ + .register_thread = rcu_register_thread, \ + .unregister_thread = rcu_unregister_thread,\ +} + +extern const struct rcu_flavor_struct rcu_flavor; + +#ifdef __cplusplus +} +#endif + +#endif /* _URCU_FLAVOR_H */ diff --git a/liblttng-ht/utils.c b/liblttng-ht/utils.c new file mode 100644 index 000000000..0b3d53160 --- /dev/null +++ b/liblttng-ht/utils.c @@ -0,0 +1,495 @@ +/* + * Copyright (C) - Bob Jenkins, May 2006, Public Domain. + * Copyright (C) 2011 - David Goulet + * Copyright (C) 2011 - Mathieu Desnoyers + * + * These are functions for producing 32-bit hashes for hash table lookup. + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() are + * externally useful functions. Routines to test the hash are included if + * SELF_TEST is defined. You can use this free for any purpose. It's in the + * public domain. It has no warranty. + * + * You probably want to use hashlittle(). hashlittle() and hashbig() hash byte + * arrays. hashlittle() is is faster than hashbig() on little-endian machines. + * Intel and AMD are little-endian machines. On second thought, you probably + * want hashlittle2(), which is identical to hashlittle() except it returns two + * 32-bit hashes for the price of one. You could implement hashbig2() if you + * wanted but I haven't bothered here. + * + * If you want to find a hash of, say, exactly 7 integers, do + * a = i1; b = i2; c = i3; + * mix(a,b,c); + * a += i4; b += i5; c += i6; + * mix(a,b,c); + * a += i7; + * final(a,b,c); + * then use c as the hash value. If you have a variable length array of + * 4-byte integers to hash, use hashword(). If you have a byte array (like + * a character string), use hashlittle(). If you have several byte arrays, or + * a mix of things, see the comments above hashlittle(). + * + * Why is this so big? I read 12 bytes at a time into 3 4-byte integers, then + * mix those integers. This is fast (you can do a lot more thorough mixing + * with 12*3 instructions on 3 integers than you can with 3 instructions on 1 + * byte), but shoehorning those bytes into integers efficiently is messy. + */ + +#include +#include /* attempt to define endianness */ +#include /* defines uint32_t etc */ +#include /* defines printf for tests */ +#include +#include /* attempt to define endianness */ +#include /* defines time_t for timings in the test */ +#include + +#include "utils.h" + +/* + * My best guess at if you are big-endian or little-endian. This may + * need adjustment. + */ +#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ + __BYTE_ORDER == __LITTLE_ENDIAN) || \ + (defined(i386) || defined(__i386__) || defined(__i486__) || \ + defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ + __BYTE_ORDER == __BIG_ENDIAN) || \ + (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#else +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 0 +#endif + +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* + * mix -- mix 3 32-bit values reversibly. + * + * This is reversible, so any information in (a,b,c) before mix() is + * still in (a,b,c) after mix(). + * + * If four pairs of (a,b,c) inputs are run through mix(), or through + * mix() in reverse, there are at least 32 bits of the output that + * are sometimes the same for one pair and different for another pair. + * This was tested for: + * * pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * * the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that + * satisfy this are + * 4 6 8 16 19 4 + * 9 15 3 18 27 15 + * 14 9 3 7 17 3 + * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing + * for "differ" defined as + with a one-bit base and a two-bit delta. I + * used http://burtleburtle.net/bob/hash/avalanche.html to choose + * the operations, constants, and arrangements of the variables. + * + * This does not achieve avalanche. There are input bits of (a,b,c) + * that fail to affect some output bits of (a,b,c), especially of a. The + * most thoroughly mixed value is c, but it doesn't really even achieve + * avalanche in c. + * + * This allows some parallelism. Read-after-writes are good at doubling + * the number of bits affected, so the goal of mixing pulls in the opposite + * direction as the goal of parallelism. I did what I could. Rotates + * seem to cost as much as shifts on every machine I could lay my hands + * on, and rotates are much kinder to the top and bottom bits, so I used + * rotates. + */ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* + * final -- final mixing of 3 32-bit values (a,b,c) into c + * + * Pairs of (a,b,c) values differing in only a few bits will usually + * produce values of c that look totally different. This was tested for + * * pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * * the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * These constants passed: + * 14 11 25 16 4 14 24 + * 12 14 25 16 4 14 24 + * and these came close: + * 4 8 15 26 3 22 24 + * 10 8 15 26 3 22 24 + * 11 8 15 26 3 22 24 + */ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* + * k - the key, an array of uint32_t values + * length - the length of the key, in uint32_ts + * initval - the previous hash, or an arbitrary value + */ +static uint32_t __attribute__((unused)) hashword(const uint32_t *k, + size_t length, uint32_t initval) +{ + uint32_t a, b, c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + (((uint32_t) length) << 2) + initval; + + /*----------------------------------------- handle most of the key */ + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a, b, c); + length -= 3; + k += 3; + } + + /*----------------------------------- handle the last 3 uint32_t's */ + switch (length) { /* all the case statements fall through */ + case 3: c += k[2]; + case 2: b += k[1]; + case 1: a += k[0]; + final(a, b, c); + case 0: /* case 0: nothing left to add */ + break; + } + /*---------------------------------------------- report the result */ + return c; +} + + +/* + * hashword2() -- same as hashword(), but take two seeds and return two 32-bit + * values. pc and pb must both be nonnull, and *pc and *pb must both be + * initialized with seeds. If you pass in (*pb)==0, the output (*pc) will be + * the same as the return value from hashword(). + */ +static void __attribute__((unused)) hashword2(const uint32_t *k, size_t length, + uint32_t *pc, uint32_t *pb) +{ + uint32_t a, b, c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t) (length << 2)) + *pc; + c += *pb; + + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a, b, c); + length -= 3; + k += 3; + } + + switch (length) { + case 3 : + c += k[2]; + case 2 : + b += k[1]; + case 1 : + a += k[0]; + final(a, b, c); + case 0: /* case 0: nothing left to add */ + break; + } + + *pc = c; + *pb = b; +} + +/* + * hashlittle() -- hash a variable-length key into a 32-bit value + * k : the key (the unaligned variable-length array of bytes) + * length : the length of the key, counting by bytes + * initval : can be any 4-byte value + * Returns a 32-bit value. Every bit of the key affects every bit of + * the return value. Two keys differing by one or two bits will have + * totally different hash values. + * + * The best hash table sizes are powers of 2. There is no need to do + * mod a prime (mod is sooo slow!). If you need less than 32 bits, + * use a bitmask. For example, if you need only 10 bits, do + * h = (h & hashmask(10)); + * In which case, the hash table should have hashsize(10) elements. + * + * If you are hashing n strings (uint8_t **)k, do it like this: + * for (i=0, h=0; i 12) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch (length) { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } +#else /* make valgrind happy */ + const uint8_t *k8; + + k8 = (const uint8_t *)k; + switch (length) { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } +#endif /* !valgrind */ + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + k8 = (const uint8_t *)k; + switch (length) { + case 12: + c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: + c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: + c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9: + c+=k8[8]; /* fall through */ + case 8: + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7: + b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6: + b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5: + b+=k8[4]; /* fall through */ + case 4: + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3: + a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2: + a+=k[0]; + break; + case 1: + a+=k8[0]; + break; + case 0: + return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + while (length > 12) { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + switch(length) { /* all the case statements fall through */ + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9: c+=k[8]; + case 8: b+=((uint32_t)k[7])<<24; + case 7: b+=((uint32_t)k[6])<<16; + case 6: b+=((uint32_t)k[5])<<8; + case 5: b+=k[4]; + case 4: a+=((uint32_t)k[3])<<24; + case 3: a+=((uint32_t)k[2])<<16; + case 2: a+=((uint32_t)k[1])<<8; + case 1: + a+=k[0]; + break; + case 0: + return c; + } + } + + final(a,b,c); + return c; +} + +#if (CAA_BITS_PER_LONG == 64) +/* + * Hash function for number value. + */ +unsigned long hash_key_ulong(void *_key, unsigned long seed) +{ + union { + uint64_t v64; + uint32_t v32[2]; + } v; + union { + uint64_t v64; + uint32_t v32[2]; + } key; + + v.v64 = (uint64_t) seed; + key.v64 = (uint64_t) _key; + hashword2(key.v32, 2, &v.v32[0], &v.v32[1]); + return v.v64; +} +#else +/* + * Hash function for number value. + */ +unsigned long hash_key_ulong(void *_key, unsigned long seed) +{ + uint32_t key = (uint32_t) _key; + + return hashword(&key, 1, seed); +} +#endif /* CAA_BITS_PER_LONG */ + +/* + * Hash function for string. + */ +unsigned long hash_key_str(void *key, unsigned long seed) +{ + return hashlittle(key, strlen((char *) key), seed); +} + +/* + * Hash function compare for number value. + */ +int hash_match_key_ulong(void *key1, void *key2) +{ + if (key1 == key2) { + return 1; + } + + return 0; +} + +/* + * Hash compare function for string. + */ +int hash_match_key_str(void *key1, void *key2) +{ + if (strcmp(key1, key2) == 0) { + return 1; + } + + return 0; +} diff --git a/liblttng-ht/utils.h b/liblttng-ht/utils.h new file mode 100644 index 000000000..86b340f72 --- /dev/null +++ b/liblttng-ht/utils.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2011 - David Goulet + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; only version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _LTT_HT_UTILS_H +#define _LTT_HT_UTILS_H + +#include + +unsigned long hash_key_ulong(void *_key, unsigned long seed); +unsigned long hash_key_str(void *key, unsigned long seed); +int hash_match_key_ulong(void *key1, void *key2); +int hash_match_key_str(void *key1, void *key2); + +#endif /* _LTT_HT_UTILS_H */ diff --git a/lttng-sessiond/Makefile.am b/lttng-sessiond/Makefile.am index b853e260b..8fcea947e 100644 --- a/lttng-sessiond/Makefile.am +++ b/lttng-sessiond/Makefile.am @@ -37,7 +37,8 @@ lttng_sessiond_LDADD = -lrt -lurcu-common -lurcu \ $(top_builddir)/liblttng-sessiond-comm/liblttng-sessiond-comm.la \ $(top_builddir)/libkernelctl/libkernelctl.la \ $(top_builddir)/liblttngctl/liblttngctl.la \ - $(top_builddir)/common/libcommon.la + $(top_builddir)/common/libcommon.la \ + $(top_builddir)/liblttng-ht/liblttng-ht.la if HAVE_LIBLTTNG_UST_CTL lttng_sessiond_LDADD += -llttng-ust-ctl diff --git a/lttng-sessiond/channel.c b/lttng-sessiond/channel.c index bf6a6e5aa..7ff974da0 100644 --- a/lttng-sessiond/channel.c +++ b/lttng-sessiond/channel.c @@ -20,11 +20,11 @@ #include #include +#include #include #include #include "channel.h" -#include "../common/hashtable.h" #include "kernel.h" #include "ust-ctl.h" #include "utils.h" @@ -216,7 +216,7 @@ int channel_ust_create(struct ltt_ust_session *usess, int domain, struct lttng_channel *attr) { int ret = LTTCOMM_OK; - struct cds_lfht *chan_ht; + struct lttng_ht *chan_ht; struct ltt_ust_channel *uchan = NULL; struct lttng_channel *defattr = NULL; @@ -259,7 +259,7 @@ int channel_ust_create(struct ltt_ust_session *usess, int domain, } uchan->enabled = 1; - hashtable_add_unique(chan_ht, &uchan->node); + lttng_ht_add_unique_str(chan_ht, &uchan->node); DBG2("Channel %s created successfully", uchan->name); free(defattr); diff --git a/lttng-sessiond/context.c b/lttng-sessiond/context.c index 51beb3fba..e29cc09a5 100644 --- a/lttng-sessiond/context.c +++ b/lttng-sessiond/context.c @@ -22,11 +22,11 @@ #include #include +#include #include #include #include "context.h" -#include "../common/hashtable.h" #include "kernel.h" #include "ust-app.h" #include "trace-ust.h" @@ -180,7 +180,7 @@ static int add_uctx_to_channel(struct ltt_ust_session *usess, int domain, } /* Add ltt UST context node to ltt UST channel */ - hashtable_add_unique(uchan->ctx, &uctx->node); + lttng_ht_add_unique_ulong(uchan->ctx, &uctx->node); return LTTCOMM_OK; @@ -219,7 +219,7 @@ static int add_uctx_to_event(struct ltt_ust_session *usess, int domain, } /* Add ltt UST context node to ltt UST event */ - hashtable_add_unique(uevent->ctx, &uctx->node); + lttng_ht_add_unique_ulong(uevent->ctx, &uctx->node); return LTTCOMM_OK; @@ -280,8 +280,8 @@ int context_ust_add(struct ltt_ust_session *usess, int domain, char *channel_name) { int ret = LTTCOMM_OK, have_event = 0; - struct cds_lfht_iter iter; - struct cds_lfht *chan_ht; + struct lttng_ht_iter iter, uiter; + struct lttng_ht *chan_ht; struct ltt_ust_channel *uchan = NULL; struct ltt_ust_event *uevent = NULL; @@ -332,7 +332,7 @@ int context_ust_add(struct ltt_ust_session *usess, int domain, ret = add_uctx_to_channel(usess, domain, uchan, ctx); } else if (!uchan && have_event) { /* Add ctx to event */ /* Add context to event without having the channel name */ - cds_lfht_for_each_entry(chan_ht, &iter, uchan, node) { + cds_lfht_for_each_entry(chan_ht->ht, &iter.iter, uchan, node.node) { uevent = trace_ust_find_event_by_name(uchan->events, event_name); if (uevent != NULL) { ret = add_uctx_to_event(usess, domain, uchan, uevent, ctx); @@ -348,9 +348,7 @@ int context_ust_add(struct ltt_ust_session *usess, int domain, goto error; } else if (!uchan && !have_event) { /* Add ctx all events, all channels */ /* For all channels */ - cds_lfht_for_each_entry(chan_ht, &iter, uchan, node) { - struct cds_lfht_iter uiter; - + cds_lfht_for_each_entry(chan_ht->ht, &iter.iter, uchan, node.node) { ret = add_uctx_to_channel(usess, domain, uchan, ctx); if (ret < 0) { ERR("Context added to channel %s failed", uchan->name); @@ -358,7 +356,8 @@ int context_ust_add(struct ltt_ust_session *usess, int domain, } /* For all events in channel */ - cds_lfht_for_each_entry(uchan->events, &uiter, uevent, node) { + cds_lfht_for_each_entry(uchan->events->ht, &uiter.iter, uevent, + node.node) { ret = add_uctx_to_event(usess, domain, uchan, uevent, ctx); if (ret < 0) { ERR("Context add to event %s in channel %s failed", diff --git a/lttng-sessiond/event.c b/lttng-sessiond/event.c index f4c48179d..8582d2f8f 100644 --- a/lttng-sessiond/event.c +++ b/lttng-sessiond/event.c @@ -21,12 +21,12 @@ #include #include +#include #include #include #include "channel.h" #include "event.h" -#include "../common/hashtable.h" #include "kernel.h" #include "ust-ctl.h" #include "ust-app.h" @@ -260,7 +260,7 @@ int event_ust_enable_all_tracepoints(struct ltt_ust_session *usess, int domain, { int ret, i; size_t size; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ltt_ust_event *uevent = NULL; struct lttng_event *events = NULL; @@ -268,7 +268,8 @@ int event_ust_enable_all_tracepoints(struct ltt_ust_session *usess, int domain, case LTTNG_DOMAIN_UST: { /* Enable existing events */ - cds_lfht_for_each_entry(uchan->events, &iter, uevent, node) { + cds_lfht_for_each_entry(uchan->events->ht, &iter.iter, uevent, + node.node) { if (uevent->enabled == 0) { ret = ust_app_enable_event_glb(usess, uchan, uevent); if (ret < 0) { @@ -327,7 +328,7 @@ int event_ust_enable_all_tracepoints(struct ltt_ust_session *usess, int domain, uevent->enabled = 1; /* Add ltt ust event to channel */ rcu_read_lock(); - hashtable_add_unique(uchan->events, &uevent->node); + lttng_ht_add_unique_str(uchan->events, &uevent->node); rcu_read_unlock(); } @@ -410,7 +411,7 @@ int event_ust_enable_tracepoint(struct ltt_ust_session *usess, int domain, /* Add ltt ust event to channel */ if (to_create) { rcu_read_lock(); - hashtable_add_unique(uchan->events, &uevent->node); + lttng_ht_add_unique_str(uchan->events, &uevent->node); rcu_read_unlock(); } @@ -481,7 +482,7 @@ int event_ust_disable_all_tracepoints(struct ltt_ust_session *usess, int domain, { int ret, i; size_t size; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ltt_ust_event *uevent = NULL; struct lttng_event *events = NULL; @@ -489,7 +490,8 @@ int event_ust_disable_all_tracepoints(struct ltt_ust_session *usess, int domain, case LTTNG_DOMAIN_UST: { /* Disabling existing events */ - cds_lfht_for_each_entry(uchan->events, &iter, uevent, node) { + cds_lfht_for_each_entry(uchan->events->ht, &iter.iter, uevent, + node.node) { if (uevent->enabled == 1) { ret = ust_app_disable_event_glb(usess, uchan, uevent); if (ret < 0) { diff --git a/lttng-sessiond/main.c b/lttng-sessiond/main.c index ff8ec1fcd..845ec0684 100644 --- a/lttng-sessiond/main.c +++ b/lttng-sessiond/main.c @@ -39,6 +39,7 @@ #include #include +#include #include #include @@ -50,7 +51,6 @@ #include "context.h" #include "event.h" #include "futex.h" -#include "../common/hashtable.h" #include "kernel.h" #include "lttng-sessiond.h" #include "shm.h" @@ -2060,11 +2060,11 @@ static void list_lttng_channels(int domain, struct ltt_session *session, break; case LTTNG_DOMAIN_UST: { - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ltt_ust_channel *uchan; - cds_lfht_for_each_entry(session->ust_session->domain_global.channels, - &iter, uchan, node) { + cds_lfht_for_each_entry(session->ust_session->domain_global.channels->ht, + &iter.iter, uchan, node.node) { strncpy(channels[i].name, uchan->name, LTTNG_SYMBOL_NAME_LEN); channels[i].attr.overwrite = uchan->attr.overwrite; channels[i].attr.subbuf_size = uchan->attr.subbuf_size; @@ -2097,8 +2097,8 @@ static int list_lttng_ust_global_events(char *channel_name, { int i = 0, ret = 0; unsigned int nb_event = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *node; + struct lttng_ht_iter iter; + struct lttng_ht_node_str *node; struct ltt_ust_channel *uchan; struct ltt_ust_event *uevent; struct lttng_event *tmp; @@ -2107,16 +2107,16 @@ static int list_lttng_ust_global_events(char *channel_name, rcu_read_lock(); - node = hashtable_lookup(ust_global->channels, (void *) channel_name, - strlen(channel_name), &iter); + lttng_ht_lookup(ust_global->channels, (void *)channel_name, &iter); + node = lttng_ht_iter_get_node_str(&iter); if (node == NULL) { ret = -LTTCOMM_UST_CHAN_NOT_FOUND; goto error; } - uchan = caa_container_of(node, struct ltt_ust_channel, node); + uchan = caa_container_of(&node->node, struct ltt_ust_channel, node.node); - nb_event += hashtable_get_count(uchan->events); + nb_event += lttng_ht_get_count(uchan->events); if (nb_event == 0) { ret = nb_event; @@ -2131,7 +2131,7 @@ static int list_lttng_ust_global_events(char *channel_name, goto error; } - cds_lfht_for_each_entry(uchan->events, &iter, uevent, node) { + cds_lfht_for_each_entry(uchan->events->ht, &iter.iter, uevent, node.node) { strncpy(tmp[i].name, uevent->attr.name, LTTNG_SYMBOL_NAME_LEN); tmp[i].name[LTTNG_SYMBOL_NAME_LEN - 1] = '\0'; tmp[i].enabled = uevent->enabled; @@ -2257,7 +2257,7 @@ static int cmd_disable_channel(struct ltt_session *session, case LTTNG_DOMAIN_UST: { struct ltt_ust_channel *uchan; - struct cds_lfht *chan_ht; + struct lttng_ht *chan_ht; chan_ht = usess->domain_global.channels; @@ -2297,7 +2297,7 @@ static int cmd_enable_channel(struct ltt_session *session, { int ret; struct ltt_ust_session *usess = session->ust_session; - struct cds_lfht *chan_ht; + struct lttng_ht *chan_ht; DBG("Enabling channel %s for session %s", attr->name, session->name); @@ -2620,7 +2620,6 @@ static int cmd_enable_event(struct ltt_session *session, int domain, } /* At this point, the session and channel exist on the tracer */ - ret = event_ust_enable_tracepoint(usess, domain, uchan, event); if (ret != LTTCOMM_OK) { goto error; @@ -3136,7 +3135,7 @@ static ssize_t cmd_list_channels(int domain, struct ltt_session *session, break; case LTTNG_DOMAIN_UST: if (session->ust_session != NULL) { - nb_chan = hashtable_get_count( + nb_chan = lttng_ht_get_count( session->ust_session->domain_global.channels); } DBG3("Number of UST global channels %zd", nb_chan); diff --git a/lttng-sessiond/session.c b/lttng-sessiond/session.c index 5539aec30..cccb43b74 100644 --- a/lttng-sessiond/session.c +++ b/lttng-sessiond/session.c @@ -29,7 +29,6 @@ #include #include -#include "../common/hashtable.h" #include "session.h" /* diff --git a/lttng-sessiond/trace-ust.c b/lttng-sessiond/trace-ust.c index 00d61432e..0bfcc3bd8 100644 --- a/lttng-sessiond/trace-ust.c +++ b/lttng-sessiond/trace-ust.c @@ -22,22 +22,23 @@ #include #include +#include #include -#include "../common/hashtable.h" #include "trace-ust.h" /* * Find the channel in the hashtable. */ -struct ltt_ust_channel *trace_ust_find_channel_by_name(struct cds_lfht *ht, +struct ltt_ust_channel *trace_ust_find_channel_by_name(struct lttng_ht *ht, char *name) { - struct cds_lfht_node *node; - struct cds_lfht_iter iter; + struct lttng_ht_node_str *node; + struct lttng_ht_iter iter; rcu_read_lock(); - node = hashtable_lookup(ht, (void *) name, strlen(name), &iter); + lttng_ht_lookup(ht, (void *)name, &iter); + node = lttng_ht_iter_get_node_str(&iter); if (node == NULL) { rcu_read_unlock(); goto error; @@ -56,14 +57,15 @@ error: /* * Find the event in the hashtable. */ -struct ltt_ust_event *trace_ust_find_event_by_name(struct cds_lfht *ht, +struct ltt_ust_event *trace_ust_find_event_by_name(struct lttng_ht *ht, char *name) { - struct cds_lfht_node *node; - struct cds_lfht_iter iter; + struct lttng_ht_node_str *node; + struct lttng_ht_iter iter; rcu_read_lock(); - node = hashtable_lookup(ht, (void *)name, strlen(name), &iter); + lttng_ht_lookup(ht, (void *) name, &iter); + node = lttng_ht_iter_get_node_str(&iter); if (node == NULL) { rcu_read_unlock(); goto error; @@ -102,11 +104,11 @@ struct ltt_ust_session *trace_ust_create_session(char *path, int session_id, lus->start_trace = 0; /* Alloc UST domain hash tables */ - lus->domain_pid = hashtable_new(0); - lus->domain_exec = hashtable_new_str(0); + lus->domain_pid = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); + lus->domain_exec = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); /* Alloc UST global domain channels' HT */ - lus->domain_global.channels = hashtable_new_str(0); + lus->domain_global.channels = lttng_ht_new(0, LTTNG_HT_TYPE_STRING); /* Set session path */ ret = snprintf(lus->pathname, PATH_MAX, "%s/ust", path); @@ -162,10 +164,10 @@ struct ltt_ust_channel *trace_ust_create_channel(struct lttng_channel *chan, luc->name[LTTNG_UST_SYM_NAME_LEN - 1] = '\0'; /* Init node */ - hashtable_node_init(&luc->node, (void *) luc->name, strlen(luc->name)); + lttng_ht_node_init_str(&luc->node, luc->name); /* Alloc hash tables */ - luc->events = hashtable_new_str(0); - luc->ctx = hashtable_new(0); + luc->events = lttng_ht_new(0, LTTNG_HT_TYPE_STRING); + luc->ctx = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); /* Set trace output path */ ret = snprintf(luc->pathname, PATH_MAX, "%s", path); @@ -225,10 +227,9 @@ struct ltt_ust_event *trace_ust_create_event(struct lttng_event *ev) lue->attr.name[LTTNG_UST_SYM_NAME_LEN - 1] = '\0'; /* Init node */ - hashtable_node_init(&lue->node, (void *) lue->attr.name, - strlen(lue->attr.name)); + lttng_ht_node_init_str(&lue->node, lue->attr.name); /* Alloc context hash tables */ - lue->ctx = hashtable_new(0); + lue->ctx = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); DBG2("Trace UST event %s created", lue->attr.name); @@ -297,8 +298,7 @@ struct ltt_ust_context *trace_ust_create_context( } uctx->ctx.ctx = ctx->ctx; - hashtable_node_init(&uctx->node, (void *)((unsigned long) uctx->ctx.ctx), - sizeof(void *)); + lttng_ht_node_init_ulong(&uctx->node, (unsigned long) uctx->ctx.ctx); return uctx; @@ -311,8 +311,8 @@ error: */ static void destroy_context_rcu(struct rcu_head *head) { - struct cds_lfht_node *node = - caa_container_of(head, struct cds_lfht_node, head); + struct lttng_ht_node_ulong *node = + caa_container_of(head, struct lttng_ht_node_ulong, head); struct ltt_ust_context *ctx = caa_container_of(node, struct ltt_ust_context, node); @@ -322,20 +322,20 @@ static void destroy_context_rcu(struct rcu_head *head) /* * Cleanup UST context hash table. */ -static void destroy_context(struct cds_lfht *ht) +static void destroy_context(struct lttng_ht *ht) { int ret; - struct cds_lfht_node *node; - struct cds_lfht_iter iter; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; - cds_lfht_for_each(ht, &iter, node) { - ret = hashtable_del(ht, &iter); + cds_lfht_for_each_entry(ht->ht, &iter.iter, node, node) { + ret = lttng_ht_del(ht, &iter); if (!ret) { call_rcu(&node->head, destroy_context_rcu); } } - hashtable_destroy(ht); + lttng_ht_destroy(ht); } /* @@ -354,8 +354,8 @@ void trace_ust_destroy_event(struct ltt_ust_event *event) */ static void destroy_event_rcu(struct rcu_head *head) { - struct cds_lfht_node *node = - caa_container_of(head, struct cds_lfht_node, head); + struct lttng_ht_node_str *node = + caa_container_of(head, struct lttng_ht_node_str, head); struct ltt_ust_event *event = caa_container_of(node, struct ltt_ust_event, node); @@ -365,20 +365,20 @@ static void destroy_event_rcu(struct rcu_head *head) /* * Cleanup UST events hashtable. */ -static void destroy_event(struct cds_lfht *events) +static void destroy_event(struct lttng_ht *events) { int ret; - struct cds_lfht_node *node; - struct cds_lfht_iter iter; + struct lttng_ht_node_str *node; + struct lttng_ht_iter iter; - cds_lfht_for_each(events, &iter, node) { - ret = hashtable_del(events, &iter); + cds_lfht_for_each_entry(events->ht, &iter.iter, node, node) { + ret = lttng_ht_del(events, &iter); if (!ret) { call_rcu(&node->head, destroy_event_rcu); } } - hashtable_destroy(events); + lttng_ht_destroy(events); } /* @@ -387,15 +387,15 @@ static void destroy_event(struct cds_lfht *events) void trace_ust_destroy_channel(struct ltt_ust_channel *channel) { int ret; - struct cds_lfht_node *node; - struct cds_lfht_iter iter; + struct lttng_ht_node_str *node; + struct lttng_ht_iter iter; DBG2("Trace destroy UST channel %s", channel->name); rcu_read_lock(); - cds_lfht_for_each(channel->events, &iter, node) { - ret = hashtable_del(channel->events, &iter); + cds_lfht_for_each_entry(channel->events->ht, &iter.iter, node, node) { + ret = lttng_ht_del(channel->events, &iter); if (!ret) { destroy_event(channel->events); } @@ -412,8 +412,8 @@ void trace_ust_destroy_channel(struct ltt_ust_channel *channel) */ static void destroy_channel_rcu(struct rcu_head *head) { - struct cds_lfht_node *node = - caa_container_of(head, struct cds_lfht_node, head); + struct lttng_ht_node_str *node = + caa_container_of(head, struct lttng_ht_node_str, head); struct ltt_ust_channel *channel = caa_container_of(node, struct ltt_ust_channel, node); @@ -433,58 +433,58 @@ void trace_ust_destroy_metadata(struct ltt_ust_metadata *metadata) /* * Iterate over a hash table containing channels and cleanup safely. */ -static void destroy_channels(struct cds_lfht *channels) +static void destroy_channels(struct lttng_ht *channels) { int ret; - struct cds_lfht_node *node; - struct cds_lfht_iter iter; + struct lttng_ht_node_str *node; + struct lttng_ht_iter iter; - cds_lfht_for_each(channels, &iter, node) { - ret = hashtable_del(channels, &iter); + cds_lfht_for_each_entry(channels->ht, &iter.iter, node, node) { + ret = lttng_ht_del(channels, &iter); if (!ret) { call_rcu(&node->head, destroy_channel_rcu); } } - hashtable_destroy(channels); + lttng_ht_destroy(channels); } /* * Cleanup UST pid domain. */ -static void destroy_domain_pid(struct cds_lfht *ht) +static void destroy_domain_pid(struct lttng_ht *ht) { int ret; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ltt_ust_domain_pid *dpid; - cds_lfht_for_each_entry(ht, &iter, dpid, node) { - ret = hashtable_del(ht , &iter); + cds_lfht_for_each_entry(ht->ht, &iter.iter, dpid, node.node) { + ret = lttng_ht_del(ht , &iter); if (!ret) { destroy_channels(dpid->channels); } } - hashtable_destroy(ht); + lttng_ht_destroy(ht); } /* * Cleanup UST exec name domain. */ -static void destroy_domain_exec(struct cds_lfht *ht) +static void destroy_domain_exec(struct lttng_ht *ht) { int ret; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ltt_ust_domain_exec *dexec; - cds_lfht_for_each_entry(ht, &iter, dexec, node) { - ret = hashtable_del(ht , &iter); + cds_lfht_for_each_entry(ht->ht, &iter.iter, dexec, node.node) { + ret = lttng_ht_del(ht , &iter); if (!ret) { destroy_channels(dexec->channels); } } - hashtable_destroy(ht); + lttng_ht_destroy(ht); } /* diff --git a/lttng-sessiond/trace-ust.h b/lttng-sessiond/trace-ust.h index bb092cae5..c033ed6c5 100644 --- a/lttng-sessiond/trace-ust.h +++ b/lttng-sessiond/trace-ust.h @@ -23,12 +23,12 @@ #include #include #include + #include +#include #include "ust-ctl.h" -#include "../common/hashtable.h" - /* UST Stream list */ struct ltt_ust_stream_list { unsigned int count; @@ -38,15 +38,15 @@ struct ltt_ust_stream_list { /* Context hash table nodes */ struct ltt_ust_context { struct lttng_ust_context ctx; - struct cds_lfht_node node; + struct lttng_ht_node_ulong node; }; /* UST event */ struct ltt_ust_event { unsigned int enabled; struct lttng_ust_event attr; - struct cds_lfht *ctx; - struct cds_lfht_node node; + struct lttng_ht *ctx; + struct lttng_ht_node_str node; }; /* UST stream */ @@ -64,9 +64,9 @@ struct ltt_ust_channel { char name[LTTNG_UST_SYM_NAME_LEN]; char pathname[PATH_MAX]; struct lttng_ust_channel attr; - struct cds_lfht *ctx; - struct cds_lfht *events; - struct cds_lfht_node node; + struct lttng_ht *ctx; + struct lttng_ht *events; + struct lttng_ht_node_str node; }; /* UST Metadata */ @@ -80,21 +80,21 @@ struct ltt_ust_metadata { /* UST domain global (LTTNG_DOMAIN_UST) */ struct ltt_ust_domain_global { - struct cds_lfht *channels; + struct lttng_ht *channels; }; /* UST domain pid (LTTNG_DOMAIN_UST_PID) */ struct ltt_ust_domain_pid { pid_t pid; - struct cds_lfht *channels; - struct cds_lfht_node node; + struct lttng_ht *channels; + struct lttng_ht_node_ulong node; }; /* UST domain exec name (LTTNG_DOMAIN_UST_EXEC_NAME) */ struct ltt_ust_domain_exec { char exec_name[LTTNG_UST_SYM_NAME_LEN]; - struct cds_lfht *channels; - struct cds_lfht_node node; + struct lttng_ht *channels; + struct lttng_ht_node_str node; }; /* UST session */ @@ -108,8 +108,8 @@ struct ltt_ust_session { * contains a HT of channels. See ltt_ust_domain_exec and * ltt_ust_domain_pid data structures. */ - struct cds_lfht *domain_pid; - struct cds_lfht *domain_exec; + struct lttng_ht *domain_pid; + struct lttng_ht *domain_exec; /* UID/GID of the user owning the session */ uid_t uid; gid_t gid; @@ -120,9 +120,9 @@ struct ltt_ust_session { /* * Lookup functions. NULL is returned if not found. */ -struct ltt_ust_event *trace_ust_find_event_by_name(struct cds_lfht *ht, +struct ltt_ust_event *trace_ust_find_event_by_name(struct lttng_ht *ht, char *name); -struct ltt_ust_channel *trace_ust_find_channel_by_name(struct cds_lfht *ht, +struct ltt_ust_channel *trace_ust_find_channel_by_name(struct lttng_ht *ht, char *name); /* @@ -149,14 +149,14 @@ void trace_ust_destroy_event(struct ltt_ust_event *event); #else /* HAVE_LIBLTTNG_UST_CTL */ static inline -struct ltt_ust_event *trace_ust_find_event_by_name(struct cds_lfht *ht, +struct ltt_ust_event *trace_ust_find_event_by_name(struct lttng_ht *ht, char *name) { return NULL; } static inline -struct ltt_ust_channel *trace_ust_find_channel_by_name(struct cds_lfht *ht, +struct ltt_ust_channel *trace_ust_find_channel_by_name(struct lttng_ht *ht, char *name) { return NULL; diff --git a/lttng-sessiond/ust-app.c b/lttng-sessiond/ust-app.c index 12e957245..d34134e94 100644 --- a/lttng-sessiond/ust-app.c +++ b/lttng-sessiond/ust-app.c @@ -28,11 +28,12 @@ #include #include + #include +#include #include #include -#include "../common/hashtable.h" #include "ust-app.h" #include "ust-consumer.h" #include "ust-ctl.h" @@ -59,16 +60,16 @@ static void delete_ust_app_event(int sock, struct ust_app_event *ua_event) { int ret; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app_ctx *ua_ctx; - cds_lfht_for_each_entry(ua_event->ctx, &iter, ua_ctx, node) { - ret = hashtable_del(ua_event->ctx, &iter); + cds_lfht_for_each_entry(ua_event->ctx->ht, &iter.iter, ua_ctx, + node.node) { + ret = lttng_ht_del(ua_event->ctx, &iter); assert(!ret); delete_ust_app_ctx(sock, ua_ctx); } - ret = hashtable_destroy(ua_event->ctx); - assert(!ret); + lttng_ht_destroy(ua_event->ctx); if (ua_event->obj != NULL) { ustctl_release_object(sock, ua_event->obj); @@ -99,7 +100,7 @@ static void delete_ust_app_channel(int sock, struct ust_app_channel *ua_chan) { int ret; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app_event *ua_event; struct ust_app_ctx *ua_ctx; struct ltt_ust_stream *stream, *stmp; @@ -111,22 +112,21 @@ void delete_ust_app_channel(int sock, struct ust_app_channel *ua_chan) } /* Wipe context */ - cds_lfht_for_each_entry(ua_chan->ctx, &iter, ua_ctx, node) { - ret = hashtable_del(ua_chan->ctx, &iter); + cds_lfht_for_each_entry(ua_chan->ctx->ht, &iter.iter, ua_ctx, node.node) { + ret = lttng_ht_del(ua_chan->ctx, &iter); assert(!ret); delete_ust_app_ctx(sock, ua_ctx); } - ret = hashtable_destroy(ua_chan->ctx); - assert(!ret); + lttng_ht_destroy(ua_chan->ctx); /* Wipe events */ - cds_lfht_for_each_entry(ua_chan->events, &iter, ua_event, node) { - ret = hashtable_del(ua_chan->events, &iter); + cds_lfht_for_each_entry(ua_chan->events->ht, &iter.iter, ua_event, + node.node) { + ret = lttng_ht_del(ua_chan->events, &iter); assert(!ret); delete_ust_app_event(sock, ua_event); } - ret = hashtable_destroy(ua_chan->events); - assert(!ret); + lttng_ht_destroy(ua_chan->events); if (ua_chan->obj != NULL) { ustctl_release_object(sock, ua_chan->obj); @@ -143,7 +143,7 @@ static void delete_ust_app_session(int sock, struct ust_app_session *ua_sess) { int ret; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app_channel *ua_chan; if (ua_sess->metadata) { @@ -157,13 +157,13 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess) } } - cds_lfht_for_each_entry(ua_sess->channels, &iter, ua_chan, node) { - ret = hashtable_del(ua_sess->channels, &iter); + cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan, + node.node) { + ret = lttng_ht_del(ua_sess->channels, &iter); assert(!ret); delete_ust_app_channel(sock, ua_chan); } - ret = hashtable_destroy(ua_sess->channels); - assert(!ret); + lttng_ht_destroy(ua_sess->channels); if (ua_sess->handle != -1) { ustctl_release_handle(sock, ua_sess->handle); @@ -179,56 +179,34 @@ static void delete_ust_app(struct ust_app *app) { int ret, sock; - struct cds_lfht_node *node; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app_session *ua_sess; rcu_read_lock(); - /* Remove from key hash table */ - node = hashtable_lookup(ust_app_sock_key_map, - (void *) ((unsigned long) app->key.sock), sizeof(void *), &iter); - if (node == NULL) { - /* Not suppose to happen */ - ERR("UST app key %d not found in key hash table", app->key.sock); - goto end; - } - - ret = hashtable_del(ust_app_sock_key_map, &iter); - if (ret) { - ERR("UST app unable to delete app sock %d from key hash table", - app->key.sock); - } else { - DBG2("UST app pair sock %d key %d deleted", - app->key.sock, app->key.pid); - } - - /* Socket is already closed at this point */ - /* Delete ust app sessions info */ sock = app->key.sock; app->key.sock = -1; /* Wipe sessions */ - cds_lfht_for_each_entry(app->sessions, &iter, ua_sess, node) { - ret = hashtable_del(app->sessions, &iter); + cds_lfht_for_each_entry(app->sessions->ht, &iter.iter, ua_sess, + node.node) { + ret = lttng_ht_del(app->sessions, &iter); assert(!ret); delete_ust_app_session(app->key.sock, ua_sess); } - ret = hashtable_destroy(app->sessions); - assert(!ret); + lttng_ht_destroy(app->sessions); /* - * Wait until we have removed the key from the sock hash table - * before closing this socket, otherwise an application could - * re-use the socket ID and race with the teardown, using the - * same hash table entry. + * Wait until we have removed the key from the sock hash table before + * closing this socket, otherwise an application could re-use the socket ID + * and race with the teardown, using the same hash table entry. */ close(sock); DBG2("UST app pid %d deleted", app->key.pid); free(app); -end: + rcu_read_unlock(); } @@ -238,11 +216,12 @@ end: static void delete_ust_app_rcu(struct rcu_head *head) { - struct cds_lfht_node *node = - caa_container_of(head, struct cds_lfht_node, head); + struct lttng_ht_node_ulong *node = + caa_container_of(head, struct lttng_ht_node_ulong, head); struct ust_app *app = caa_container_of(node, struct ust_app, node); + DBG3("Call RCU deleting app PID %d", app->key.pid); delete_ust_app(app); } @@ -262,7 +241,7 @@ struct ust_app_session *alloc_ust_app_session(void) } ua_sess->handle = -1; - ua_sess->channels = hashtable_new_str(0); + ua_sess->channels = lttng_ht_new(0, LTTNG_HT_TYPE_STRING); return ua_sess; @@ -292,10 +271,9 @@ struct ust_app_channel *alloc_ust_app_channel(char *name, ua_chan->enabled = 1; ua_chan->handle = -1; - ua_chan->ctx = hashtable_new(0); - ua_chan->events = hashtable_new_str(0); - hashtable_node_init(&ua_chan->node, (void *) ua_chan->name, - strlen(ua_chan->name)); + ua_chan->ctx = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); + ua_chan->events = lttng_ht_new(0, LTTNG_HT_TYPE_STRING); + lttng_ht_node_init_str(&ua_chan->node, ua_chan->name); CDS_INIT_LIST_HEAD(&ua_chan->streams.head); @@ -331,9 +309,8 @@ struct ust_app_event *alloc_ust_app_event(char *name, ua_event->enabled = 1; strncpy(ua_event->name, name, sizeof(ua_event->name)); ua_event->name[sizeof(ua_event->name) - 1] = '\0'; - ua_event->ctx = hashtable_new(0); - hashtable_node_init(&ua_event->node, (void *) ua_event->name, - strlen(ua_event->name)); + ua_event->ctx = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); + lttng_ht_node_init_str(&ua_event->node, ua_event->name); /* Copy attributes */ if (attr) { @@ -378,21 +355,21 @@ error: static struct ust_app *find_app_by_sock(int sock) { - struct cds_lfht_node *node; + struct lttng_ht_node_ulong *node; struct ust_app_key *key; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; - node = hashtable_lookup(ust_app_sock_key_map, - (void *)((unsigned long) sock), sizeof(void *), &iter); + lttng_ht_lookup(ust_app_sock_key_map, (void *)((unsigned long) sock), + &iter); + node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { DBG2("UST app find by sock %d key not found", sock); goto error; } - key = caa_container_of(node, struct ust_app_key, node); - node = hashtable_lookup(ust_app_ht, - (void *)((unsigned long) key->pid), sizeof(void *), &iter); + lttng_ht_lookup(ust_app_ht, (void *)((unsigned long) key->pid), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { DBG2("UST app find by sock %d not found", sock); goto error; @@ -675,7 +652,7 @@ error: static void shadow_copy_event(struct ust_app_event *ua_event, struct ltt_ust_event *uevent) { - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ltt_ust_context *uctx; struct ust_app_ctx *ua_ctx; @@ -685,14 +662,14 @@ static void shadow_copy_event(struct ust_app_event *ua_event, /* Copy event attributes */ memcpy(&ua_event->attr, &uevent->attr, sizeof(ua_event->attr)); - cds_lfht_for_each_entry(uevent->ctx, &iter, uctx, node) { + cds_lfht_for_each_entry(uevent->ctx->ht, &iter.iter, uctx, node.node) { ua_ctx = alloc_ust_app_ctx(&uctx->ctx); if (ua_ctx == NULL) { continue; } - hashtable_node_init(&ua_ctx->node, - (void *)((unsigned long) ua_ctx->ctx.ctx), sizeof(void *)); - hashtable_add_unique(ua_event->ctx, &ua_ctx->node); + lttng_ht_node_init_ulong(&ua_ctx->node, + (unsigned long) ua_ctx->ctx.ctx); + lttng_ht_add_unique_ulong(ua_event->ctx, &ua_ctx->node); } } @@ -702,8 +679,8 @@ static void shadow_copy_event(struct ust_app_event *ua_event, static void shadow_copy_channel(struct ust_app_channel *ua_chan, struct ltt_ust_channel *uchan) { - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_event_node; + struct lttng_ht_iter iter; + struct lttng_ht_node_str *ua_event_node; struct ltt_ust_event *uevent; struct ltt_ust_context *uctx; struct ust_app_event *ua_event; @@ -716,23 +693,22 @@ static void shadow_copy_channel(struct ust_app_channel *ua_chan, /* Copy event attributes */ memcpy(&ua_chan->attr, &uchan->attr, sizeof(ua_chan->attr)); - cds_lfht_for_each_entry(uchan->ctx, &iter, uctx, node) { + cds_lfht_for_each_entry(uchan->ctx->ht, &iter.iter, uctx, node.node) { ua_ctx = alloc_ust_app_ctx(&uctx->ctx); if (ua_ctx == NULL) { continue; } - hashtable_node_init(&ua_ctx->node, - (void *)((unsigned long) ua_ctx->ctx.ctx), sizeof(void *)); - hashtable_add_unique(ua_chan->ctx, &ua_ctx->node); + lttng_ht_node_init_ulong(&ua_ctx->node, + (unsigned long) ua_ctx->ctx.ctx); + lttng_ht_add_unique_ulong(ua_chan->ctx, &ua_ctx->node); } /* Copy all events from ltt ust channel to ust app channel */ - cds_lfht_for_each_entry(uchan->events, &iter, uevent, node) { - struct cds_lfht_iter uiter; + cds_lfht_for_each_entry(uchan->events->ht, &iter.iter, uevent, node.node) { + struct lttng_ht_iter uiter; - ua_event_node = hashtable_lookup(ua_chan->events, - (void *) uevent->attr.name, strlen(uevent->attr.name), - &uiter); + lttng_ht_lookup(ua_chan->events, (void *) uevent->attr.name, &uiter); + ua_event_node = lttng_ht_iter_get_node_str(&uiter); if (ua_event_node == NULL) { DBG2("UST event %s not found on shadow copy channel", uevent->attr.name); @@ -741,7 +717,7 @@ static void shadow_copy_channel(struct ust_app_channel *ua_chan, continue; } shadow_copy_event(ua_event, uevent); - hashtable_add_unique(ua_chan->events, &ua_event->node); + lttng_ht_add_unique_str(ua_chan->events, &ua_event->node); } } @@ -752,11 +728,10 @@ static void shadow_copy_channel(struct ust_app_channel *ua_chan, * Copy data between a UST app session and a regular LTT session. */ static void shadow_copy_session(struct ust_app_session *ua_sess, - struct ltt_ust_session *usess, - struct ust_app *app) + struct ltt_ust_session *usess, struct ust_app *app) { - struct cds_lfht_node *ua_chan_node; - struct cds_lfht_iter iter; + struct lttng_ht_node_str *ua_chan_node; + struct lttng_ht_iter iter; struct ltt_ust_channel *uchan; struct ust_app_channel *ua_chan; time_t rawtime; @@ -788,13 +763,12 @@ static void shadow_copy_session(struct ust_app_session *ua_sess, /* TODO: support all UST domain */ /* Iterate over all channels in global domain. */ - cds_lfht_for_each_entry(usess->domain_global.channels, &iter, - uchan, node) { - struct cds_lfht_iter uiter; + cds_lfht_for_each_entry(usess->domain_global.channels->ht, &iter.iter, + uchan, node.node) { + struct lttng_ht_iter uiter; - ua_chan_node = hashtable_lookup(ua_sess->channels, - (void *)uchan->name, strlen(uchan->name), - &uiter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); + ua_chan_node = lttng_ht_iter_get_node_str(&uiter); if (ua_chan_node != NULL) { continue; } @@ -808,7 +782,7 @@ static void shadow_copy_session(struct ust_app_session *ua_sess, } shadow_copy_channel(ua_chan, uchan); - hashtable_add_unique(ua_sess->channels, &ua_chan->node); + lttng_ht_add_unique_str(ua_sess->channels, &ua_chan->node); } } @@ -817,12 +791,10 @@ static void shadow_copy_session(struct ust_app_session *ua_sess, */ static void __lookup_session_by_app(struct ltt_ust_session *usess, - struct ust_app *app, struct cds_lfht_iter *iter) + struct ust_app *app, struct lttng_ht_iter *iter) { /* Get right UST app session from app */ - (void) hashtable_lookup(app->sessions, - (void *) ((unsigned long) usess->id), sizeof(void *), - iter); + lttng_ht_lookup(app->sessions, (void *)((unsigned long) usess->uid), iter); } /* @@ -832,11 +804,11 @@ void __lookup_session_by_app(struct ltt_ust_session *usess, static struct ust_app_session *lookup_session_by_app( struct ltt_ust_session *usess, struct ust_app *app) { - struct cds_lfht_iter iter; - struct cds_lfht_node *node; + struct lttng_ht_iter iter; + struct lttng_ht_node_ulong *node; __lookup_session_by_app(usess, app, &iter); - node = hashtable_iter_get_node(&iter); + node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { goto error; } @@ -884,9 +856,8 @@ static struct ust_app_session *create_ust_app_session( ua_sess->handle = ret; /* Add ust app session to app's HT */ - hashtable_node_init(&ua_sess->node, - (void *)((unsigned long) ua_sess->id), sizeof(void *)); - hashtable_add_unique(app->sessions, &ua_sess->node); + lttng_ht_node_init_ulong(&ua_sess->node, (unsigned long) ua_sess->uid); + lttng_ht_add_unique_ulong(app->sessions, &ua_sess->node); DBG2("UST app session created successfully with handle %d", ret); } @@ -906,14 +877,14 @@ int create_ust_app_channel_context(struct ust_app_session *ua_sess, struct ust_app *app) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *node; + struct lttng_ht_iter iter; + struct lttng_ht_node_ulong *node; struct ust_app_ctx *ua_ctx; DBG2("UST app adding context to channel %s", ua_chan->name); - node = hashtable_lookup(ua_chan->ctx, (void *)((unsigned long)uctx->ctx), - sizeof(void *), &iter); + lttng_ht_lookup(ua_chan->ctx, (void *)((unsigned long)uctx->ctx), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); if (node != NULL) { ret = -EEXIST; goto error; @@ -926,9 +897,8 @@ int create_ust_app_channel_context(struct ust_app_session *ua_sess, goto error; } - hashtable_node_init(&ua_ctx->node, - (void *)((unsigned long) ua_ctx->ctx.ctx), sizeof(void *)); - hashtable_add_unique(ua_chan->ctx, &ua_ctx->node); + lttng_ht_node_init_ulong(&ua_ctx->node, (unsigned long) ua_ctx->ctx.ctx); + lttng_ht_add_unique_ulong(ua_chan->ctx, &ua_ctx->node); ret = create_ust_channel_context(ua_chan, ua_ctx, app); if (ret < 0) { @@ -948,14 +918,14 @@ int create_ust_app_event_context(struct ust_app_session *ua_sess, struct ust_app *app) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *node; + struct lttng_ht_iter iter; + struct lttng_ht_node_ulong *node; struct ust_app_ctx *ua_ctx; DBG2("UST app adding context to event %s", ua_event->name); - node = hashtable_lookup(ua_event->ctx, (void *)((unsigned long)uctx->ctx), - sizeof(void *), &iter); + lttng_ht_lookup(ua_event->ctx, (void *)((unsigned long)uctx->ctx), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); if (node != NULL) { ret = -EEXIST; goto error; @@ -968,9 +938,8 @@ int create_ust_app_event_context(struct ust_app_session *ua_sess, goto error; } - hashtable_node_init(&ua_ctx->node, - (void *)((unsigned long) ua_ctx->ctx.ctx), sizeof(void *)); - hashtable_add_unique(ua_event->ctx, &ua_ctx->node); + lttng_ht_node_init_ulong(&ua_ctx->node, (unsigned long) ua_ctx->ctx.ctx); + lttng_ht_add_unique_ulong(ua_event->ctx, &ua_ctx->node); ret = create_ust_event_context(ua_event, ua_ctx, app); if (ret < 0) { @@ -1047,12 +1016,12 @@ static int enable_ust_app_channel(struct ust_app_session *ua_sess, struct ltt_ust_channel *uchan, struct ust_app *app) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_chan_node; + struct lttng_ht_iter iter; + struct lttng_ht_node_str *ua_chan_node; struct ust_app_channel *ua_chan; - ua_chan_node = hashtable_lookup(ua_sess->channels, - (void *)uchan->name, strlen(uchan->name), &iter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &iter); + ua_chan_node = lttng_ht_iter_get_node_str(&iter); if (ua_chan_node == NULL) { DBG2("Unable to find channel %s in ust session id %u", uchan->name, ua_sess->id); @@ -1078,13 +1047,13 @@ static struct ust_app_channel *create_ust_app_channel( struct ust_app *app) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_chan_node; + struct lttng_ht_iter iter; + struct lttng_ht_node_str *ua_chan_node; struct ust_app_channel *ua_chan; /* Lookup channel in the ust app session */ - ua_chan_node = hashtable_lookup(ua_sess->channels, - (void *)uchan->name, strlen(uchan->name), &iter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &iter); + ua_chan_node = lttng_ht_iter_get_node_str(&iter); if (ua_chan_node == NULL) { DBG2("Unable to find channel %s in ust session id %u", uchan->name, ua_sess->id); @@ -1094,7 +1063,7 @@ static struct ust_app_channel *create_ust_app_channel( } shadow_copy_channel(ua_chan, uchan); - hashtable_add_unique(ua_sess->channels, &ua_chan->node); + lttng_ht_add_unique_str(ua_sess->channels, &ua_chan->node); } else { ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); } @@ -1119,13 +1088,13 @@ int create_ust_app_event(struct ust_app_session *ua_sess, struct ust_app *app) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_event_node; + struct lttng_ht_iter iter; + struct lttng_ht_node_str *ua_event_node; struct ust_app_event *ua_event; /* Get event node */ - ua_event_node = hashtable_lookup(ua_chan->events, - (void *)uevent->attr.name, strlen(uevent->attr.name), &iter); + lttng_ht_lookup(ua_chan->events, (void *)uevent->attr.name, &iter); + ua_event_node = lttng_ht_iter_get_node_str(&iter); if (ua_event_node != NULL) { ERR("UST app event %s already exist. Stopping creation.", uevent->attr.name); @@ -1152,7 +1121,7 @@ int create_ust_app_event(struct ust_app_session *ua_sess, ua_event->enabled = 1; - hashtable_add_unique(ua_chan->events, &ua_event->node); + lttng_ht_add_unique_str(ua_chan->events, &ua_event->node); DBG2("UST app create event %s for PID %d completed", ua_event->name, app->key.pid); @@ -1224,7 +1193,7 @@ error: /* * Return pointer to traceable apps list. */ -struct cds_lfht *ust_app_get_ht(void) +struct lttng_ht *ust_app_get_ht(void) { return ust_app_ht; } @@ -1234,12 +1203,12 @@ struct cds_lfht *ust_app_get_ht(void) */ struct ust_app *ust_app_find_by_pid(pid_t pid) { - struct cds_lfht_node *node; - struct cds_lfht_iter iter; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; rcu_read_lock(); - node = hashtable_lookup(ust_app_ht, - (void *)((unsigned long) pid), sizeof(void *), &iter); + lttng_ht_lookup(ust_app_ht, (void *)((unsigned long) pid), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { DBG2("UST app no found with pid %d", pid); goto error; @@ -1288,19 +1257,17 @@ int ust_app_register(struct ust_register_msg *msg, int sock) lta->v_minor = msg->minor; strncpy(lta->name, msg->name, sizeof(lta->name)); lta->name[16] = '\0'; - lta->sessions = hashtable_new(0); + lta->sessions = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); /* Set key map */ lta->key.pid = msg->pid; - hashtable_node_init(<a->node, (void *)((unsigned long)lta->key.pid), - sizeof(void *)); + lttng_ht_node_init_ulong(<a->node, (unsigned long)lta->key.pid); lta->key.sock = sock; - hashtable_node_init(<a->key.node, (void *)((unsigned long)lta->key.sock), - sizeof(void *)); + lttng_ht_node_init_ulong(<a->key.node, (unsigned long)lta->key.sock); rcu_read_lock(); - hashtable_add_unique(ust_app_sock_key_map, <a->key.node); - hashtable_add_unique(ust_app_ht, <a->node); + lttng_ht_add_unique_ulong(ust_app_sock_key_map, <a->key.node); + lttng_ht_add_unique_ulong(ust_app_ht, <a->node); rcu_read_unlock(); DBG("App registered with pid:%d ppid:%d uid:%d gid:%d sock:%d name:%s" @@ -1319,8 +1286,8 @@ int ust_app_register(struct ust_register_msg *msg, int sock) void ust_app_unregister(int sock) { struct ust_app *lta; - struct cds_lfht_node *node; - struct cds_lfht_iter iter; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; int ret; rcu_read_lock(); @@ -1333,14 +1300,14 @@ void ust_app_unregister(int sock) DBG("PID %d unregistering with sock %d", lta->key.pid, sock); /* Get the node reference for a call_rcu */ - node = hashtable_lookup(ust_app_ht, - (void *)((unsigned long) lta->key.pid), sizeof(void *), &iter); + lttng_ht_lookup(ust_app_ht, (void *)((unsigned long) lta->key.pid), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { ERR("Unable to find app sock %d by pid %d", sock, lta->key.pid); goto error; } - ret = hashtable_del(ust_app_ht, &iter); + ret = lttng_ht_del(ust_app_ht, &iter); assert(!ret); call_rcu(&node->head, delete_ust_app_rcu); error: @@ -1356,7 +1323,7 @@ unsigned long ust_app_list_count(void) unsigned long count; rcu_read_lock(); - count = hashtable_get_count(ust_app_ht); + count = lttng_ht_get_count(ust_app_ht); rcu_read_unlock(); return count; @@ -1369,7 +1336,7 @@ int ust_app_list_events(struct lttng_event **events) { int ret, handle; size_t nbmem, count = 0; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app *app; struct lttng_event *tmp; @@ -1383,7 +1350,7 @@ int ust_app_list_events(struct lttng_event **events) rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { struct lttng_ust_tracepoint_iter uiter; handle = ustctl_tracepoint_list(app->key.sock); @@ -1433,21 +1400,27 @@ error: void ust_app_clean_list(void) { int ret; - struct cds_lfht_iter iter; - struct ust_app *app; + struct lttng_ht_iter iter; + struct lttng_ht_node_ulong *node; DBG2("UST app cleaning registered apps hash table"); rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { - ret = hashtable_del(ust_app_ht, &iter); + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, node, node) { + ret = lttng_ht_del(ust_app_ht, &iter); assert(!ret); - call_rcu(&iter.node->head, delete_ust_app_rcu); + call_rcu(&node->head, delete_ust_app_rcu); } + /* Destroy is done only when the ht is empty */ + lttng_ht_destroy(ust_app_ht); - hashtable_destroy(ust_app_ht); - hashtable_destroy(ust_app_sock_key_map); + cds_lfht_for_each_entry(ust_app_sock_key_map->ht, &iter.iter, node, node) { + ret = lttng_ht_del(ust_app_sock_key_map, &iter); + assert(!ret); + } + /* Destroy is done only when the ht is empty */ + lttng_ht_destroy(ust_app_sock_key_map); rcu_read_unlock(); } @@ -1457,8 +1430,8 @@ void ust_app_clean_list(void) */ void ust_app_ht_alloc(void) { - ust_app_ht = hashtable_new(0); - ust_app_sock_key_map = hashtable_new(0); + ust_app_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); + ust_app_sock_key_map = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); } /* @@ -1468,8 +1441,8 @@ int ust_app_disable_channel_glb(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_chan_node; + struct lttng_ht_iter iter; + struct lttng_ht_node_str *ua_chan_node; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -1486,17 +1459,16 @@ int ust_app_disable_channel_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For every registered applications */ - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { - struct cds_lfht_iter uiter; - + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + struct lttng_ht_iter uiter; ua_sess = lookup_session_by_app(usess, app); if (ua_sess == NULL) { continue; } /* Get channel */ - ua_chan_node = hashtable_lookup(ua_sess->channels, - (void *)uchan->name, strlen(uchan->name), &uiter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); + ua_chan_node = lttng_ht_iter_get_node_str(&uiter); /* If the session if found for the app, the channel must be there */ assert(ua_chan_node); @@ -1525,7 +1497,7 @@ int ust_app_enable_channel_glb(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan) { int ret = 0; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app *app; struct ust_app_session *ua_sess; @@ -1541,7 +1513,7 @@ int ust_app_enable_channel_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For every registered applications */ - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { ua_sess = lookup_session_by_app(usess, app); if (ua_sess == NULL) { continue; @@ -1568,8 +1540,8 @@ int ust_app_disable_event_glb(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan, struct ltt_ust_event *uevent) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_chan_node, *ua_event_node; + struct lttng_ht_iter iter, uiter; + struct lttng_ht_node_str *ua_chan_node, *ua_event_node; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -1581,9 +1553,7 @@ int ust_app_disable_event_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For all registered applications */ - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { - struct cds_lfht_iter uiter; - + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { ua_sess = lookup_session_by_app(usess, app); if (ua_sess == NULL) { /* Next app */ @@ -1591,8 +1561,8 @@ int ust_app_disable_event_glb(struct ltt_ust_session *usess, } /* Lookup channel in the ust app session */ - ua_chan_node = hashtable_lookup(ua_sess->channels, - (void *)uchan->name, strlen(uchan->name), &uiter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); + ua_chan_node = lttng_ht_iter_get_node_str(&uiter); if (ua_chan_node == NULL) { DBG2("Channel %s not found in session id %d for app pid %d." "Skipping", uchan->name, usess->id, app->key.pid); @@ -1600,8 +1570,8 @@ int ust_app_disable_event_glb(struct ltt_ust_session *usess, } ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); - ua_event_node = hashtable_lookup(ua_chan->events, - (void *)uevent->attr.name, strlen(uevent->attr.name), &uiter); + lttng_ht_lookup(ua_chan->events, (void *)uevent->attr.name, &uiter); + ua_event_node = lttng_ht_iter_get_node_str(&uiter); if (ua_event_node == NULL) { DBG2("Event %s not found in channel %s for app pid %d." "Skipping", uevent->attr.name, uchan->name, app->key.pid); @@ -1629,8 +1599,8 @@ int ust_app_disable_all_event_glb(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_chan_node; + struct lttng_ht_iter iter, uiter; + struct lttng_ht_node_str *ua_chan_node; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -1642,23 +1612,22 @@ int ust_app_disable_all_event_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For all registered applications */ - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { - struct cds_lfht_iter uiter; - + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { ua_sess = lookup_session_by_app(usess, app); /* If ua_sess is NULL, there is a code flow error */ assert(ua_sess); /* Lookup channel in the ust app session */ - ua_chan_node = hashtable_lookup(ua_sess->channels, (void *)uchan->name, - strlen(uchan->name), &uiter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); + ua_chan_node = lttng_ht_iter_get_node_str(&uiter); /* If the channel is not found, there is a code flow error */ assert(ua_chan_node); ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); /* Disable each events of channel */ - cds_lfht_for_each_entry(ua_chan->events, &uiter, ua_event, node) { + cds_lfht_for_each_entry(ua_chan->events->ht, &uiter.iter, ua_event, + node.node) { ret = disable_ust_app_event(ua_sess, ua_event, app); if (ret < 0) { /* XXX: Report error someday... */ @@ -1679,7 +1648,7 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan) { int ret = 0; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -1696,7 +1665,7 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For every registered applications */ - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { /* * Create session on the tracer side and add it to app session HT. Note * that if session exist, it will simply return a pointer to the ust @@ -1727,8 +1696,8 @@ int ust_app_enable_event_glb(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan, struct ltt_ust_event *uevent) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_chan_node, *ua_event_node; + struct lttng_ht_iter iter, uiter; + struct lttng_ht_node_str *ua_chan_node, *ua_event_node; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -1746,23 +1715,21 @@ int ust_app_enable_event_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For all registered applications */ - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { - struct cds_lfht_iter uiter; - + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { ua_sess = lookup_session_by_app(usess, app); /* If ua_sess is NULL, there is a code flow error */ assert(ua_sess); /* Lookup channel in the ust app session */ - ua_chan_node = hashtable_lookup(ua_sess->channels, (void *)uchan->name, - strlen(uchan->name), &uiter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); + ua_chan_node = lttng_ht_iter_get_node_str(&uiter); /* If the channel is not found, there is a code flow error */ assert(ua_chan_node); ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); - ua_event_node = hashtable_lookup(ua_chan->events, - (void*)uevent->attr.name, strlen(uevent->attr.name), &uiter); + lttng_ht_lookup(ua_chan->events, (void*)uevent->attr.name, &uiter); + ua_event_node = lttng_ht_iter_get_node_str(&uiter); if (ua_event_node == NULL) { DBG3("UST app enable event %s not found for app PID %d." "Skipping app", uevent->attr.name, app->key.pid); @@ -1789,8 +1756,8 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan, struct ltt_ust_event *uevent) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_chan_node; + struct lttng_ht_iter iter, uiter; + struct lttng_ht_node_str *ua_chan_node; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -1807,16 +1774,14 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For all registered applications */ - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { - struct cds_lfht_iter uiter; - + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { ua_sess = lookup_session_by_app(usess, app); /* If ua_sess is NULL, there is a code flow error */ assert(ua_sess); /* Lookup channel in the ust app session */ - ua_chan_node = hashtable_lookup(ua_sess->channels, (void *)uchan->name, - strlen(uchan->name), &uiter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); + ua_chan_node = lttng_ht_iter_get_node_str(&uiter); /* If the channel is not found, there is a code flow error */ assert(ua_chan_node); @@ -1839,7 +1804,7 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess, int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app) { int ret = 0; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; struct ltt_ust_stream *ustream; @@ -1865,7 +1830,8 @@ int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app) } /* For each channel */ - cds_lfht_for_each_entry(ua_sess->channels, &iter, ua_chan, node) { + cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan, + node.node) { /* Create all streams */ while (1) { /* Create UST stream */ @@ -1942,7 +1908,7 @@ error_rcu_unlock: int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app) { int ret = 0; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -1967,7 +1933,8 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app) ustctl_wait_quiescent(app->key.sock); /* Flushing buffers */ - cds_lfht_for_each_entry(ua_sess->channels, &iter, ua_chan, node) { + cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan, + node.node) { ret = ustctl_sock_flush_buffer(app->key.sock, ua_chan->obj); if (ret < 0) { ERR("UST app PID %d channel %s flush failed", @@ -2001,8 +1968,8 @@ int ust_app_destroy_trace(struct ltt_ust_session *usess, struct ust_app *app) { struct ust_app_session *ua_sess; struct lttng_ust_object_data obj; - struct cds_lfht_iter iter; - struct cds_lfht_node *node; + struct lttng_ht_iter iter; + struct lttng_ht_node_ulong *node; int ret; DBG("Destroy tracing for ust app pid %d", app->key.pid); @@ -2010,13 +1977,13 @@ int ust_app_destroy_trace(struct ltt_ust_session *usess, struct ust_app *app) rcu_read_lock(); __lookup_session_by_app(usess, app, &iter); - node = hashtable_iter_get_node(&iter); + node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { /* Only malloc can failed so something is really wrong */ goto error_rcu_unlock; } ua_sess = caa_container_of(node, struct ust_app_session, node); - ret = hashtable_del(app->sessions, &iter); + ret = lttng_ht_del(app->sessions, &iter); assert(!ret); delete_ust_app_session(app->key.sock, ua_sess); obj.handle = ua_sess->handle; @@ -2043,14 +2010,14 @@ error_rcu_unlock: int ust_app_start_trace_all(struct ltt_ust_session *usess) { int ret = 0; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app *app; DBG("Starting all UST traces"); rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { ret = ust_app_start_trace(usess, app); if (ret < 0) { /* Continue to next apps even on error */ @@ -2069,14 +2036,14 @@ int ust_app_start_trace_all(struct ltt_ust_session *usess) int ust_app_stop_trace_all(struct ltt_ust_session *usess) { int ret = 0; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app *app; DBG("Stopping all UST traces"); rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { ret = ust_app_stop_trace(usess, app); if (ret < 0) { /* Continue to next apps even on error */ @@ -2095,14 +2062,14 @@ int ust_app_stop_trace_all(struct ltt_ust_session *usess) int ust_app_destroy_trace_all(struct ltt_ust_session *usess) { int ret = 0; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter; struct ust_app *app; DBG("Destroy all UST traces"); rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { ret = ust_app_destroy_trace(usess, app); if (ret < 0) { /* Continue to next apps even on error */ @@ -2121,7 +2088,7 @@ int ust_app_destroy_trace_all(struct ltt_ust_session *usess) void ust_app_global_update(struct ltt_ust_session *usess, int sock) { int ret = 0; - struct cds_lfht_iter iter; + struct lttng_ht_iter iter, uiter; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -2153,9 +2120,8 @@ void ust_app_global_update(struct ltt_ust_session *usess, int sock) * app session above made a shadow copy of the UST global domain from the * ltt ust session. */ - cds_lfht_for_each_entry(ua_sess->channels, &iter, ua_chan, node) { - struct cds_lfht_iter uiter; - + cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan, + node.node) { ret = create_ust_channel(app, ua_sess, ua_chan); if (ret < 0) { /* FIXME: Should we quit here or continue... */ @@ -2163,7 +2129,8 @@ void ust_app_global_update(struct ltt_ust_session *usess, int sock) } /* For each events */ - cds_lfht_for_each_entry(ua_chan->events, &uiter, ua_event, node) { + cds_lfht_for_each_entry(ua_chan->events->ht, &uiter.iter, ua_event, + node.node) { ret = create_ust_event(app, ua_sess, ua_chan, ua_event); if (ret < 0) { /* FIXME: Should we quit here or continue... */ @@ -2193,25 +2160,23 @@ int ust_app_add_ctx_channel_glb(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan, struct ltt_ust_context *uctx) { int ret = 0; - struct cds_lfht_node *ua_chan_node; - struct cds_lfht_iter iter; + struct lttng_ht_node_str *ua_chan_node; + struct lttng_ht_iter iter, uiter; struct ust_app_channel *ua_chan = NULL; struct ust_app_session *ua_sess; struct ust_app *app; rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { - struct cds_lfht_iter uiter; - + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { ua_sess = lookup_session_by_app(usess, app); if (ua_sess == NULL) { continue; } /* Lookup channel in the ust app session */ - ua_chan_node = hashtable_lookup(ua_sess->channels, - (void *)uchan->name, strlen(uchan->name), &uiter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); + ua_chan_node = lttng_ht_iter_get_node_str(&uiter); if (ua_chan_node == NULL) { continue; } @@ -2236,8 +2201,8 @@ int ust_app_add_ctx_event_glb(struct ltt_ust_session *usess, struct ltt_ust_context *uctx) { int ret = 0; - struct cds_lfht_node *ua_chan_node, *ua_event_node; - struct cds_lfht_iter iter; + struct lttng_ht_node_str *ua_chan_node, *ua_event_node; + struct lttng_ht_iter iter, uiter; struct ust_app_session *ua_sess; struct ust_app_event *ua_event; struct ust_app_channel *ua_chan = NULL; @@ -2245,25 +2210,23 @@ int ust_app_add_ctx_event_glb(struct ltt_ust_session *usess, rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht, &iter, app, node) { - struct cds_lfht_iter uiter; - + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { ua_sess = lookup_session_by_app(usess, app); if (ua_sess == NULL) { continue; } /* Lookup channel in the ust app session */ - ua_chan_node = hashtable_lookup(ua_sess->channels, - (void *)uchan->name, strlen(uchan->name), &uiter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); + ua_chan_node = lttng_ht_iter_get_node_str(&uiter); if (ua_chan_node == NULL) { continue; } ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); - ua_event_node = hashtable_lookup(ua_chan->events, - (void *)uevent->attr.name, strlen(uevent->attr.name), &uiter); + lttng_ht_lookup(ua_chan->events, (void *)uevent->attr.name, &uiter); + ua_event_node = lttng_ht_iter_get_node_str(&uiter); if (ua_event_node == NULL) { continue; } @@ -2287,8 +2250,8 @@ int ust_app_enable_event_pid(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan, struct ltt_ust_event *uevent, pid_t pid) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_chan_node, *ua_event_node; + struct lttng_ht_iter iter; + struct lttng_ht_node_str *ua_chan_node, *ua_event_node; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -2310,15 +2273,15 @@ int ust_app_enable_event_pid(struct ltt_ust_session *usess, assert(ua_sess); /* Lookup channel in the ust app session */ - ua_chan_node = hashtable_lookup(ua_sess->channels, (void *)uchan->name, - strlen(uchan->name), &iter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &iter); + ua_chan_node = lttng_ht_iter_get_node_str(&iter); /* If the channel is not found, there is a code flow error */ assert(ua_chan_node); ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); - ua_event_node = hashtable_lookup(ua_chan->events, - (void*)uevent->attr.name, strlen(uevent->attr.name), &iter); + lttng_ht_lookup(ua_chan->events, (void *)uevent->attr.name, &iter); + ua_event_node = lttng_ht_iter_get_node_str(&iter); if (ua_event_node == NULL) { ret = create_ust_app_event(ua_sess, ua_chan, uevent, app); if (ret < 0) { @@ -2345,8 +2308,8 @@ int ust_app_disable_event_pid(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan, struct ltt_ust_event *uevent, pid_t pid) { int ret = 0; - struct cds_lfht_iter iter; - struct cds_lfht_node *ua_chan_node, *ua_event_node; + struct lttng_ht_iter iter; + struct lttng_ht_node_str *ua_chan_node, *ua_event_node; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -2368,16 +2331,16 @@ int ust_app_disable_event_pid(struct ltt_ust_session *usess, assert(ua_sess); /* Lookup channel in the ust app session */ - ua_chan_node = hashtable_lookup(ua_sess->channels, (void *)uchan->name, - strlen(uchan->name), &iter); + lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &iter); + ua_chan_node = lttng_ht_iter_get_node_str(&iter); if (ua_chan_node == NULL) { /* Channel does not exist, skip disabling */ goto error; } ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); - ua_event_node = hashtable_lookup(ua_chan->events, - (void*)uevent->attr.name, strlen(uevent->attr.name), &iter); + lttng_ht_lookup(ua_chan->events, (void *)uevent->attr.name, &iter); + ua_event_node = lttng_ht_iter_get_node_str(&iter); if (ua_event_node == NULL) { /* Event does not exist, skip disabling */ goto error; diff --git a/lttng-sessiond/ust-app.h b/lttng-sessiond/ust-app.h index 0c753ab36..8cc049aef 100644 --- a/lttng-sessiond/ust-app.h +++ b/lttng-sessiond/ust-app.h @@ -45,21 +45,21 @@ struct ust_register_msg { /* * Global applications HT used by the session daemon. */ -struct cds_lfht *ust_app_ht; +struct lttng_ht *ust_app_ht; -struct cds_lfht *ust_app_sock_key_map; +struct lttng_ht *ust_app_sock_key_map; struct ust_app_key { pid_t pid; int sock; - struct cds_lfht_node node; + struct lttng_ht_node_ulong node; }; struct ust_app_ctx { int handle; struct lttng_ust_context ctx; struct lttng_ust_object_data *obj; - struct cds_lfht_node node; + struct lttng_ht_node_ulong node; }; struct ust_app_event { @@ -68,8 +68,8 @@ struct ust_app_event { struct lttng_ust_object_data *obj; struct lttng_ust_event attr; char name[LTTNG_UST_SYM_NAME_LEN]; - struct cds_lfht *ctx; - struct cds_lfht_node node; + struct lttng_ht *ctx; + struct lttng_ht_node_str node; }; struct ust_app_channel { @@ -79,9 +79,9 @@ struct ust_app_channel { struct lttng_ust_channel attr; struct lttng_ust_object_data *obj; struct ltt_ust_stream_list streams; - struct cds_lfht *ctx; - struct cds_lfht *events; - struct cds_lfht_node node; + struct lttng_ht *ctx; + struct lttng_ht *events; + struct lttng_ht_node_str node; }; struct ust_app_session { @@ -91,12 +91,12 @@ struct ust_app_session { int handle; /* used has unique identifier for app session */ int id; /* session unique identifier */ struct ltt_ust_metadata *metadata; - struct cds_lfht *channels; /* Registered channels */ - struct cds_lfht_node node; + struct lttng_ht *channels; /* Registered channels */ + struct lttng_ht_node_ulong node; + char path[PATH_MAX]; /* UID/GID of the user owning the session */ uid_t uid; gid_t gid; - char path[PATH_MAX]; }; /* @@ -111,8 +111,8 @@ struct ust_app { uint32_t v_major; /* Verion major number */ uint32_t v_minor; /* Verion minor number */ char name[17]; /* Process name (short) */ - struct cds_lfht *sessions; - struct cds_lfht_node node; + struct lttng_ht *sessions; + struct lttng_ht_node_ulong node; struct ust_app_key key; }; @@ -159,7 +159,7 @@ void ust_app_global_update(struct ltt_ust_session *usess, int sock); void ust_app_clean_list(void); void ust_app_ht_alloc(void); -struct cds_lfht *ust_app_get_ht(void); +struct lttng_ht *ust_app_get_ht(void); struct ust_app *ust_app_find_by_pid(pid_t pid); #else /* HAVE_LIBLTTNG_UST_CTL */ @@ -226,7 +226,7 @@ struct ust_app *ust_app_get_by_pid(pid_t pid) return NULL; } static inline -struct cds_lfht *ust_app_get_ht(void) +struct lttng_ht *ust_app_get_ht(void) { return NULL; } diff --git a/lttng-sessiond/ust-consumer.c b/lttng-sessiond/ust-consumer.c index d6b80ff64..c02cbf965 100644 --- a/lttng-sessiond/ust-consumer.c +++ b/lttng-sessiond/ust-consumer.c @@ -23,11 +23,11 @@ #include #include +#include #include #include #include -#include "../common/hashtable.h" #include "ust-consumer.h" /* @@ -120,10 +120,9 @@ int ust_consumer_send_session(int consumer_fd, struct ust_app_session *usess) { int ret = 0; int sock = consumer_fd; - struct cds_lfht_iter iter; - struct cds_lfht_node *node; + struct lttng_ht_iter iter; struct lttcomm_consumer_msg lum; - struct ust_app_channel *uchan; + struct ust_app_channel *ua_chan; DBG("Sending metadata stream fd"); @@ -182,17 +181,13 @@ int ust_consumer_send_session(int consumer_fd, struct ust_app_session *usess) /* Send each channel fd streams of session */ rcu_read_lock(); - hashtable_get_first(usess->channels, &iter); - while ((node = hashtable_iter_get_node(&iter)) != NULL) { - uchan = caa_container_of(node, struct ust_app_channel, node); - - ret = send_channel_streams(sock, uchan, usess->uid, - usess->gid); + cds_lfht_for_each_entry(usess->channels->ht, &iter.iter, ua_chan, + node.node) { + ret = send_channel_streams(sock, ua_chan, usess->uid, usess->gid); if (ret < 0) { rcu_read_unlock(); goto error; } - hashtable_get_next(usess->channels, &iter); } rcu_read_unlock();