From: Olivier Dion Date: Fri, 18 Oct 2024 15:41:11 +0000 (-0400) Subject: Honor URCU_DEREFERENCE_USE_VOLATILE X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=4e87c71c3189841ac7fa223814ef0ef11d5dc9e9;p=urcu.git Honor URCU_DEREFERENCE_USE_VOLATILE When users define `URCU_DEREFERENCE_USE_VOLATILE', `rcu_dereference()' is implemented using `CMM_ACCESS_ONCE()', even when compiled with atomic builtins or if the compiler support the C11 memory model. This allows optimization on weakly-ordered architectures where the `CMM_CONSUME' memory order is implemented as `CMM_ACQUIRE'. To prevent the compiler from doing certain optimizations on pointers equivalence, introduce the `cmm_ptr_eq()' API for pointer comparisons. This also introduces the `_CMM_OPTIMIZER_HIDE_VAR()' private macro for forcing the compiler optimizer to believe that a variable can be manipulated arbitrarily, thus preventing optimizations based on pointer equivalence. Change-Id: Ib48c29370017990e6acf40a7590ab286a89691bd Signed-off-by: Olivier Dion Signed-off-by: Mathieu Desnoyers --- diff --git a/include/urcu/compiler.h b/include/urcu/compiler.h index 0e94d2f..6254252 100644 --- a/include/urcu/compiler.h +++ b/include/urcu/compiler.h @@ -51,6 +51,10 @@ # error "URCU was configured to use atomic builtins, but this toolchain does not support them." #endif +/* Make the optimizer believe the variable can be manipulated arbitrarily. */ +#define _CMM_OPTIMIZER_HIDE_VAR(var) \ + __asm__ ("" : "+r" (var)) + #ifndef caa_max #define caa_max(a,b) ((a)>(b)?(a):(b)) #endif diff --git a/include/urcu/static/pointer.h b/include/urcu/static/pointer.h index 9dc0d3e..8b87b83 100644 --- a/include/urcu/static/pointer.h +++ b/include/urcu/static/pointer.h @@ -55,6 +55,11 @@ extern "C" { * volatile access to implement rcu_dereference rather than * memory_order_consume load from the C11/C++11 standards. * + * CAUTION: If URCU_DEREFERENCE_USE_VOLATILE is defined, pointers comparisons + * _must_ be done using the `cmm_ptr_eq' static inline helper function to + * prevent the compiler optimizations from losing addresses dependencies, + * resulting in the loss of the publication guarantee. + * * This may improve performance on weakly-ordered architectures where * the compiler implements memory_order_consume as a * memory_order_acquire, which is stricter than required by the @@ -69,21 +74,114 @@ extern "C" { * meets the 10-line criterion in LGPL, allowing this function to be * expanded directly in non-LGPL code. */ - -#if !defined (URCU_DEREFERENCE_USE_VOLATILE) && \ - ((defined (__cplusplus) && __cplusplus >= 201103L) || \ - (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)) -# define __URCU_DEREFERENCE_USE_ATOMIC_CONSUME +#ifdef URCU_DEREFERENCE_USE_VOLATILE +# define _rcu_dereference(p) \ + __extension__ \ + ({ \ + cmm_smp_rmc(); \ + CMM_ACCESS_ONCE(p); \ + }) +#else +# define _rcu_dereference(p) \ + uatomic_load(&(p), CMM_CONSUME) #endif -/* - * If p is const (the pointer itself, not what it points to), using - * __typeof__(p) would declare a const variable, leading to - * -Wincompatible-pointer-types errors. Using the statement expression - * makes it an rvalue and gets rid of the const-ness. +/** + * Compare pointers A and B, but prevent the compiler from using this knowledge + * for optimization. + * + * This may introduce some overhead. For example, compilers might allocate more + * register, resulting in register spilling. + * + * For example, in the following code, `cmm_ptr_eq' prevents the compiler from + * re-using the derefence of `a' at the first call of `do_func()' for the second + * call. If a simple equality comparison was used, the compiler could use the + * value of `global->x' across two RCU critical regions. + * + * struct work { + * long x; + * } + * + * struct work *global; + * + * void func(void) + * { + * struct work *a, *b; + * + * rcu_read_lock(); + * a = rcu_dereference(global); + * if (a) + * do_func(a->x); + * rcu_read_unlock(); + * + * some_stuff(); + * + * rcu_read_lock(); + * b = rcu_dereference(global); + * if (b && cmm_ptr_eq(a, b)) // force to use b->x and not a->x + * do_func(b->x); + * rcu_read_unlock(); + * } + * + * Consider what could happen if such compiler optimization was done while the + * following mutator exists. The mutator removes a `work' node that was + * previously stored in a global location by storing NULL. It then call + * rcu_synchronize() to ensure no readers can see the node. From there, a new + * work is created for the node and the node is commited with + * rcu_assign_pointer(), thus the same pointer is now visible to readers. This + * fictional mutator is purposely trying to create a ABA problem for the sake of + * the argument: + * + * void mutator(void) + * { + * struct work *work; + * + * work = global; + * rcu_assign_pointer(global, NULL); + * rcu_synchronize(); + * work->x = new_work(); + * rcu_assign_pointer(global, work); + * } + * + * Then, the following execution could happen, where the new work assigned is + * not executed. In other words, the compiler optimizations can introduce a ABA + * problem, defeating one of RCU's purposes. + * + * Worker: Mutator: + * let a, b let work = global + * let tmp + * rcu_read_lock() + * a = rcu_dereference(global) + * if (a) { + * tmp = a->x + * do_func(tmp) + * } + * rcu_read_unlock() + * do_stuff() ... + * ... rcu_assign_pointer(global, NULL) + * ... rcu_synchronize() + * ... work->x = new_work() + * ... rcu_assign_pointer(global, work) + * rcu_read_lock() + * b = rcu_dereference(global) + * if (b && (a == b)) // BUGGY !!! + * do_func(tmp) + * rcu_read_unlock() + * + * In other words, the publication guarantee is lost here. The store to + * `work->x' in the mutator is supposed to be observable by the second + * rcu_dereference() in the worker, because of the matching + * rcu_assign_pointer(). But due to the equality comparison, the compiler + * decides to reuse the prior rcu_dereference(). */ -# define _rcu_dereference(p) \ - uatomic_load(&(p), CMM_CONSUME) +static inline int cmm_ptr_eq(const void *a, const void *b) +{ + _CMM_OPTIMIZER_HIDE_VAR(a); + _CMM_OPTIMIZER_HIDE_VAR(b); + + return a == b; +} + /** * _rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer * is as expected by "old". If succeeds, returns the previous pointer to the