64-bit: 1 byte + 113 bytes + 6 bytes pad + 113*8 = 1024 bytes
-- Type B: bitmap, followed by pointers array.
+- Type B: pools of values and pointers arrays
-bitmask (256 entries -> 256 bits -> 32 bytes) of populated children
-followed by an array of children pointers, in same order as appears in
-the bitmask
+Pools of values and pointers arrays. Each pool values array is 32-bytes
+in size (so it fits in a L1 cacheline). Each pool begins with an 8-bit
+integer, which is the number of children in this pool, followed by an
+array of 8-bit values, padding, and an array of pointers. Values and
+pointer arrays are associated as in Type A.
-+ Allows lookup failure to use 32-byte cache-line only. (1 cacheline)
-+ Allows lookup success to use 32-byte cache-line (bitmap),
- followed by direct lookup into pointer array. (2 cachelines)
+The entries of a node are associated to their respective pool based
+on their index position.
-Filled at 8 entries 32-bit, 12 entries 64-bit
-32-bit: 32 + (4*8) -> 64 bytes
-64-bit: 32 + (8*12) -> 128 bytes
++ Allows lookup failure to use 1 32-byte cache-line only. (1 cacheline)
+ lookup success: 2 cache lines.
-Filled at 24 entries 32-bit, 28 entries 64-bit
-32-bit: 32 + (4*24) -> 128 bytes
-64-bit: 32 + (8*28) -> 256 bytes
++ Allows in-place updates without reallocation, except when a pool is
+ full. (this was not possible with bitmap-based nodes)
+- If one pool exhausts its space, we need to increase the node size.
+ Therefore, for very dense populations, we will end up using the
+ pigeon-hole node type sooner, thus consuming more space.
-Filled at 56 entries 32-bit, 60 entries 64-bit
-32-bit: 32 + (4*56) -> 256 bytes
-64-bit: 32 + (8*60) -> 512 bytes
+Pool configuration:
-Filled at 120 entries 32-bit, 124 entries 64-bit
-32-bit: 32 + (4*95) -> 512 bytes
-64-bit: 32 + (8*124) -> 1024 bytes
+Per pool, filled at 25 entries (32-bit), 28 entries (64-bit)
+32-bit: 1 byte + 25 bytes + 2 bytes pad + 25*4bytes = 128 bytes
+64-bit: 1 byte + 28 bytes + 3 bytes pad + 28*8 = 256 bytes
+
+Total up to 50 entries (32-bit), 56 entries (64-bit)
+2 pools: 32-bit = 256 bytes
+2 pools: 64-bit = 512 bytes
+
+Total up to 100 entries (32-bit), 112 entries (64-bit)
+4 pools: 32-bit = 512 bytes
+4 pools: 32-bit = 1024 bytes
- Type C: pigeon-hole array
up to 12 children
-- Type B: bitmap, followed by pointers array.
-- 2 cache line hit for lookup success
- 128 bytes storage
-up to 24 children
+up to 25 children
+
+- Type B: pool
- 256 bytes storage
-up to 56 children
+
+up to 50 children
- 512 bytes storage
-up to 120 children
+up to 100 children
- Type C: pigeon-hole array
- 1 cache line hit for lookup success
up to 14 children
-- Type B: bitmap, followed by pointers array.
-- 2 cache line hit for lookup success
- 256 bytes storage
up to 28 children
+- Type B: pool
+
- 512 bytes storage
-up to 60 children
+up to 56 children
- 1024 bytes storage
-up to 124 children
+up to 112 children
- Type C: pigeon-hole array
- 1 cache line hit for lookup success
enum child_type {
RCU_JA_LINEAR = 0, /* Type A */
- /* 32-bit: 1 to 12 children, 8 to 64 bytes */
- /* 64-bit: 1 to 14 children, 16 to 128 bytes */
- RCU_JA_BITMAP = 1, /* Type B */
- /* 32-bit: 13 to 120 children, 128 to 512 bytes */
- /* 64-bit: 15 to 124 children, 256 to 1024 bytes */
+ /* 32-bit: 1 to 25 children, 8 to 128 bytes */
+ /* 64-bit: 1 to 28 children, 16 to 256 bytes */
+ RCU_JA_POOL = 1, /* Type B */
+ /* 32-bit: 26 to 100 children, 256 to 512 bytes */
+ /* 64-bit: 29 to 112 children, 512 to 1024 bytes */
RCU_JA_PIGEON = 2, /* Type C */
- /* 32-bit: 121 to 256 children, 1024 bytes */
- /* 64-bit: 125 to 256 children, 2048 bytes */
+ /* 32-bit: 101 to 256 children, 1024 bytes */
+ /* 64-bit: 113 to 256 children, 2048 bytes */
/* Leaf nodes are implicit from their height in the tree */
};
enum rcu_ja_type_class {
RCU_JA_LINEAR = 0, /* Type A */
- /* 32-bit: 1 to 12 children, 8 to 64 bytes */
- /* 64-bit: 1 to 14 children, 16 to 128 bytes */
- RCU_JA_BITMAP = 1, /* Type B */
- /* 32-bit: 13 to 120 children, 128 to 512 bytes */
- /* 64-bit: 15 to 124 children, 256 to 1024 bytes */
+ /* 32-bit: 1 to 25 children, 8 to 128 bytes */
+ /* 64-bit: 1 to 28 children, 16 to 256 bytes */
+ RCU_JA_POOL = 1, /* Type B */
+ /* 32-bit: 26 to 100 children, 256 to 512 bytes */
+ /* 64-bit: 29 to 112 children, 512 to 1024 bytes */
RCU_JA_PIGEON = 2, /* Type C */
- /* 32-bit: 121 to 256 children, 1024 bytes */
- /* 64-bit: 125 to 256 children, 2048 bytes */
+ /* 32-bit: 101 to 256 children, 1024 bytes */
+ /* 64-bit: 113 to 256 children, 2048 bytes */
/* Leaf nodes are implicit from their height in the tree */
};
uint16_t min_child; /* minimum number of children: 1 to 256 */
uint16_t max_child; /* maximum number of children: 1 to 256 */
uint16_t order; /* node size is (1 << order), in bytes */
+ uint16_t nr_pool_order; /* number of pools */
+ uint16_t pool_size_order; /* pool size */
};
/*
{ .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = 3, .order = 4, },
{ .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = 6, .order = 5, },
{ .type_class = RCU_JA_LINEAR, .min_child = 4, .max_child = 12, .order = 6, },
+ { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = 25, .order = 7, },
- { .type_class = RCU_JA_BITMAP, .min_child = 10, .max_child = 24, .order = 7, },
- { .type_class = RCU_JA_BITMAP, .min_child = 20, .max_child = 56, .order = 8, },
- { .type_class = RCU_JA_BITMAP, .min_child = 46, .max_child = 120, .order = 9, },
+ /* Pools may fill sooner than max_child */
+ { .type_class = RCU_JA_POOL, .min_child = 20, .max_child = 50, .order = 8, .nr_pool_order = 1, .pool_size_order = 7, },
+ { .type_class = RCU_JA_POOL, .min_child = 42, .max_child = 100, .order = 9, .nr_pool_order = 2, .pool_size_order = 7, },
- { .type_class = RCU_JA_PIGEON, .min_child = 100, .max_child = 256, .order = 10, },
+ /* TODO: Upon downsize, if at least one pool is filled, we need to keep pigeon */
+ { .type_class = RCU_JA_PIGEON, .min_child = 90, .max_child = 256, .order = 10, },
};
CAA_BUILD_BUG_ON(CAA_ARRAY_SIZE(ja_types) > JA_TYPE_MAX_NR);
#else /* !(CAA_BITS_PER_LONG < 64) */
{ .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = 3, .order = 5, },
{ .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = 7, .order = 6, },
{ .type_class = RCU_JA_LINEAR, .min_child = 5, .max_child = 14, .order = 7, },
+ { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = 28, .order = 8, },
- { .type_class = RCU_JA_BITMAP, .min_child = 10, .max_child = 28, .order = 8, },
- { .type_class = RCU_JA_BITMAP, .min_child = 22, .max_child = 60, .order = 9, },
- { .type_class = RCU_JA_BITMAP, .min_child = 49, .max_child = 124, .order = 10, },
+ /* Pools may fill sooner than max_child */
+ { .type_class = RCU_JA_POOL, .min_child = 22, .max_child = 56, .order = 9, .nr_pool_order = 1, .pool_size_order = 8, },
+ { .type_class = RCU_JA_POOL, .min_child = 44, .max_child = 112, .order = 10, .nr_pool_order = 2, .pool_size_order = 8, },
- { .type_class = RCU_JA_PIGEON, .min_child = 102, .max_child = 256, .order = 11, },
+ /* TODO: Upon downsize, if at least one pool is filled, we need to keep pigeon */
+ { .type_class = RCU_JA_PIGEON, .min_child = 100, .max_child = 256, .order = 11, },
};
CAA_BUILD_BUG_ON(CAA_ARRAY_SIZE(ja_types) > JA_TYPE_MAX_NR);
#endif /* !(BITS_PER_LONG < 64) */
free(node);
}
-/* The bitmap for 256 entries is always 32 bytes */
-#define CHAR_BIT_SHIFT 3UL
-#define CHAR_BIT_MASK ((1UL << CHAR_BIT_SHIFT) - 1)
-#if (CHAR_BIT != (1UL << CHAR_BIT_SHIFT))
-#error "char size not supported."
-#endif
-
-#define ULONG_BIT_MASK (CAA_BITS_PER_LONG - 1)
-
-#define JA_BITMAP_BITS JA_ENTRY_PER_NODE
-#define JA_BITMAP_LEN (JA_BITMAP_BITS / CHAR_BIT)
-
#define __JA_ALIGN_MASK(v, mask) (((v) + (mask)) & ~(mask))
#define JA_ALIGN(v, align) __JA_ALIGN_MASK(v, (typeof(v)) (align) - 1)
#define __JA_FLOOR_MASK(v, mask) ((v) & ~(mask))
struct rcu_ja_node_flag *ptr;
unsigned int i;
- assert(type->type_class == RCU_JA_LINEAR);
+ assert(!type || type->type_class == RCU_JA_LINEAR);
nr_child = node->data[0];
cmm_smp_rmb(); /* read nr_child before values */
- assert(nr_child <= type->max_child);
- assert(nr_child >= type->min_child);
+ assert(!type || nr_child <= type->max_child);
+ assert(!type || nr_child >= type->min_child);
values = &node[1];
for (i = 0; i < nr_child; i++) {
return ptr;
}
-#if 0
-/*
- * Count hweight. Expect most bits to be 0. Algorithm from
- * Wegner (1960): count those in n steps (n being the number of
- * hot bits). Ref.: Wegner, Peter (1960), "A technique for
- * counting ones in a binary computer", Communications of the
- * ACM 3 (5): 322, doi:10.1145/367236.367286.
- */
-static
-unsigned int ja_hweight_uchar(unsigned char value)
-{
- unsigned int count = 0;
-
- for (; value; count++)
- value &= value - 1;
- return count;
-}
-#endif //0
-
-#if (CAA_BITS_PER_LONG < 64)
-static
-unsigned int ja_hweight_ulong(unsigned long value)
-{
- unsigned long r;
-
- r = value;
- r = r - ((r >> 1) & 0x55555555);
- r = (r & 0x33333333) + ((r >> 2) & 0x33333333);
- r += r >> 4;
- r &= 0x0F0F0F0F;
- r += r >> 8;
- r += r >> 16;
- r &= 0x000000FF;
- return r;
-}
-#else /* !(CAA_BITS_PER_LONG < 64) */
-static
-unsigned int ja_hweight_ulong(unsigned long value)
-{
- unsigned long r;
-
- r = value;
- r = r - ((r >> 1) & 0x5555555555555555UL);
- r = (r & 0x3333333333333333UL) + ((r >> 2) & 0x3333333333333333UL);
- r += r >> 4;
- r &= 0x0F0F0F0F0F0F0F0FUL;
- r += r >> 8;
- r += r >> 16;
- r += r >> 32;
- r &= 0x00000000000000FFUL;
- return r;
-}
-#endif /* !(BITS_PER_LONG < 64) */
-
static
-struct rcu_ja_node_flag *ja_bitmap_node_get_nth(const struct rcu_ja_type *type,
+struct rcu_ja_node_flag *ja_pool_node_get_nth(const struct rcu_ja_type *type,
struct rcu_ja_node *node,
uint8_t n)
{
- uint8_t *bitmap;
- uint8_t byte_nr;
- struct rcu_ja_node_flag *pointers;
struct rcu_ja_node_flag *ptr;
- unsigned int count;
-
- assert(type->type_class == RCU_JA_BITMAP);
-
- bitmap = &node->data[0];
- /*
- * Check if n is hot in the bitmap. If yes, count the hweight
- * prior to n, including n, to get the pointer index.
- * The bitmap goes from least significant (0) to most
- * significant (255) as bytes increase.
- */
- byte_nr = n >> CHAR_BIT_SHIFT;
- if (bitmap[byte_nr] & (1U << (n & CHAR_BIT_MASK))) {
- uint8_t byte_iter;
- unsigned long v;
-
- count = 0;
- /* Count entire ulong prior to the one containing n */
- for (byte_iter = 0; byte_iter < JA_FLOOR(byte_nr, sizeof(unsigned long));
- byte_iter += sizeof(unsigned long)) {
- v = *((unsigned long *) &bitmap[byte_iter]);
- count += ja_hweight_ulong(v);
- }
- /*
- * Read only the bits prior to and including n within
- * the ulong containing n. ja_bitfield_read_le goes from
- * less significant to most significant as bytes
- * increase.
- */
- ja_bitfield_read_le(
- (unsigned long *) &bitmap[JA_FLOOR(byte_nr, sizeof(unsigned long))],
- unsigned long, 0, (n & ULONG_BIT_MASK) + 1,
- &v);
- count += ja_hweight_ulong(v);
- } else {
- return NULL;
- }
+ struct rcu_ja_node *linear;
- assert(count <= type->max_child);
- assert(count >= type->min_child);
-
- cmm_smp_rmb(); /* read bitmap before pointers */
- pointers = &bitmap[JA_BITMAP_LEN];
- ptr = pointers[count - 1];
- assert(ja_node_ptr(ptr) != NULL);
- return ptr;
+ assert(type->type_class == RCU_JA_POOL);
+ linear = (struct rcu_ja_node *)
+ &node->data[(n >> (CHAR_BIT - type->nr_pool_order)) << type->pool_size_order];
+ return ja_linear_node_get_nth(NULL, linear, n);
}
static
switch (type->type_class) {
case RCU_JA_LINEAR:
return ja_linear_node_get_nth(type, node, n);
- case RCU_JA_BITMAP:
- return ja_bitmap_node_get_nth(type, node, n);
+ case RCU_JA_POOL:
+ return ja_pool_node_get_nth(type, node, n);
case RCU_JA_PIGEON:
return ja_pigeon_node_get_nth(type, node, n);
default: